LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ chetrf_aa()

subroutine chetrf_aa ( character uplo,
integer n,
complex, dimension( lda, * ) a,
integer lda,
integer, dimension( * ) ipiv,
complex, dimension( * ) work,
integer lwork,
integer info )

CHETRF_AA

Download CHETRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> CHETRF_AA computes the factorization of a complex hermitian matrix A
!> using the Aasen's algorithm.  The form of the factorization is
!>
!>    A = U**H*T*U  or  A = L*T*L**H
!>
!> where U (or L) is a product of permutation and unit upper (lower)
!> triangular matrices, and T is a hermitian tridiagonal matrix.
!>
!> This is the blocked version of the algorithm, calling Level 3 BLAS.
!> 
Parameters
[in]UPLO
!>          UPLO is CHARACTER*1
!>          = 'U':  Upper triangle of A is stored;
!>          = 'L':  Lower triangle of A is stored.
!> 
[in]N
!>          N is INTEGER
!>          The order of the matrix A.  N >= 0.
!> 
[in,out]A
!>          A is COMPLEX array, dimension (LDA,N)
!>          On entry, the hermitian matrix A.  If UPLO = 'U', the leading
!>          N-by-N upper triangular part of A contains the upper
!>          triangular part of the matrix A, and the strictly lower
!>          triangular part of A is not referenced.  If UPLO = 'L', the
!>          leading N-by-N lower triangular part of A contains the lower
!>          triangular part of the matrix A, and the strictly upper
!>          triangular part of A is not referenced.
!>
!>          On exit, the tridiagonal matrix is stored in the diagonals
!>          and the subdiagonals of A just below (or above) the diagonals,
!>          and L is stored below (or above) the subdiagonals, when UPLO
!>          is 'L' (or 'U').
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,N).
!> 
[out]IPIV
!>          IPIV is INTEGER array, dimension (N)
!>          On exit, it contains the details of the interchanges, i.e.,
!>          the row and column k of A were interchanged with the
!>          row and column IPIV(k).
!> 
[out]WORK
!>          WORK is COMPLEX array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The length of WORK.
!>          LWORK >= 1, if N <= 1, and LWORK >= 2*N, otherwise.
!>          For optimum performance LWORK >= N*(1+NB), where NB is
!>          the optimal blocksize, returned by ILAENV.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 131 of file chetrf_aa.f.

133*
134* -- LAPACK computational routine --
135* -- LAPACK is a software package provided by Univ. of Tennessee, --
136* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
137*
138 IMPLICIT NONE
139*
140* .. Scalar Arguments ..
141 CHARACTER UPLO
142 INTEGER N, LDA, LWORK, INFO
143* ..
144* .. Array Arguments ..
145 INTEGER IPIV( * )
146 COMPLEX A( LDA, * ), WORK( * )
147* ..
148*
149* =====================================================================
150* .. Parameters ..
151 COMPLEX ZERO, ONE
152 parameter( zero = (0.0e+0, 0.0e+0), one = (1.0e+0, 0.0e+0) )
153*
154* .. Local Scalars ..
155 LOGICAL LQUERY, UPPER
156 INTEGER J, LWKMIN, LWKOPT
157 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
158 COMPLEX ALPHA
159* ..
160* .. External Functions ..
161 LOGICAL LSAME
162 INTEGER ILAENV
163 REAL SROUNDUP_LWORK
164 EXTERNAL lsame, ilaenv, sroundup_lwork
165* ..
166* .. External Subroutines ..
167 EXTERNAL clahef_aa, cgemm, ccopy, cswap, cscal,
168 $ xerbla
169* ..
170* .. Intrinsic Functions ..
171 INTRINSIC real, conjg, max
172* ..
173* .. Executable Statements ..
174*
175* Determine the block size
176*
177 nb = ilaenv( 1, 'CHETRF_AA', uplo, n, -1, -1, -1 )
178*
179* Test the input parameters.
180*
181 info = 0
182 upper = lsame( uplo, 'U' )
183 lquery = ( lwork.EQ.-1 )
184 IF( n.LE.1 ) THEN
185 lwkmin = 1
186 lwkopt = 1
187 ELSE
188 lwkmin = 2*n
189 lwkopt = (nb+1)*n
190 END IF
191*
192 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
193 info = -1
194 ELSE IF( n.LT.0 ) THEN
195 info = -2
196 ELSE IF( lda.LT.max( 1, n ) ) THEN
197 info = -4
198 ELSE IF( lwork.LT.lwkmin .AND. .NOT.lquery ) THEN
199 info = -7
200 END IF
201*
202 IF( info.EQ.0 ) THEN
203 work( 1 ) = sroundup_lwork( lwkopt )
204 END IF
205*
206 IF( info.NE.0 ) THEN
207 CALL xerbla( 'CHETRF_AA', -info )
208 RETURN
209 ELSE IF( lquery ) THEN
210 RETURN
211 END IF
212*
213* Quick return
214*
215 IF( n.EQ.0 ) THEN
216 RETURN
217 ENDIF
218 ipiv( 1 ) = 1
219 IF( n.EQ.1 ) THEN
220 a( 1, 1 ) = real( a( 1, 1 ) )
221 RETURN
222 END IF
223*
224* Adjust block size based on the workspace size
225*
226 IF( lwork.LT.((1+nb)*n) ) THEN
227 nb = ( lwork-n ) / n
228 END IF
229*
230 IF( upper ) THEN
231*
232* .....................................................
233* Factorize A as U**H*D*U using the upper triangle of A
234* .....................................................
235*
236* copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
237*
238 CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
239*
240* J is the main loop index, increasing from 1 to N in steps of
241* JB, where JB is the number of columns factorized by CLAHEF;
242* JB is either NB, or N-J+1 for the last block
243*
244 j = 0
245 10 CONTINUE
246 IF( j.GE.n )
247 $ GO TO 20
248*
249* each step of the main loop
250* J is the last column of the previous panel
251* J1 is the first column of the current panel
252* K1 identifies if the previous column of the panel has been
253* explicitly stored, e.g., K1=1 for the first panel, and
254* K1=0 for the rest
255*
256 j1 = j + 1
257 jb = min( n-j1+1, nb )
258 k1 = max(1, j)-j
259*
260* Panel factorization
261*
262 CALL clahef_aa( uplo, 2-k1, n-j, jb,
263 $ a( max(1, j), j+1 ), lda,
264 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
265*
266* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
267*
268 DO j2 = j+2, min(n, j+jb+1)
269 ipiv( j2 ) = ipiv( j2 ) + j
270 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
271 CALL cswap( j1-k1-2, a( 1, j2 ), 1,
272 $ a( 1, ipiv(j2) ), 1 )
273 END IF
274 END DO
275 j = j + jb
276*
277* Trailing submatrix update, where
278* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
279* WORK stores the current block of the auxiriarly matrix H
280*
281 IF( j.LT.n ) THEN
282*
283* if the first panel and JB=1 (NB=1), then nothing to do
284*
285 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
286*
287* Merge rank-1 update with BLAS-3 update
288*
289 alpha = conjg( a( j, j+1 ) )
290 a( j, j+1 ) = one
291 CALL ccopy( n-j, a( j-1, j+1 ), lda,
292 $ work( (j+1-j1+1)+jb*n ), 1 )
293 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
294*
295* K1 identifies if the previous column of the panel has been
296* explicitly stored, e.g., K1=0 and K2=1 for the first panel,
297* and K1=1 and K2=0 for the rest
298*
299 IF( j1.GT.1 ) THEN
300*
301* Not first panel
302*
303 k2 = 1
304 ELSE
305*
306* First panel
307*
308 k2 = 0
309*
310* First update skips the first column
311*
312 jb = jb - 1
313 END IF
314*
315 DO j2 = j+1, n, nb
316 nj = min( nb, n-j2+1 )
317*
318* Update (J2, J2) diagonal block with CGEMV
319*
320 j3 = j2
321 DO mj = nj-1, 1, -1
322 CALL cgemm( 'Conjugate transpose', 'Transpose',
323 $ 1, mj, jb+1,
324 $ -one, a( j1-k2, j3 ), lda,
325 $ work( (j3-j1+1)+k1*n ), n,
326 $ one, a( j3, j3 ), lda )
327 j3 = j3 + 1
328 END DO
329*
330* Update off-diagonal block of J2-th block row with CGEMM
331*
332 CALL cgemm( 'Conjugate transpose', 'Transpose',
333 $ nj, n-j3+1, jb+1,
334 $ -one, a( j1-k2, j2 ), lda,
335 $ work( (j3-j1+1)+k1*n ), n,
336 $ one, a( j2, j3 ), lda )
337 END DO
338*
339* Recover T( J, J+1 )
340*
341 a( j, j+1 ) = conjg( alpha )
342 END IF
343*
344* WORK(J+1, 1) stores H(J+1, 1)
345*
346 CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
347 END IF
348 GO TO 10
349 ELSE
350*
351* .....................................................
352* Factorize A as L*D*L**H using the lower triangle of A
353* .....................................................
354*
355* copy first column A(1:N, 1) into H(1:N, 1)
356* (stored in WORK(1:N))
357*
358 CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
359*
360* J is the main loop index, increasing from 1 to N in steps of
361* JB, where JB is the number of columns factorized by CLAHEF;
362* JB is either NB, or N-J+1 for the last block
363*
364 j = 0
365 11 CONTINUE
366 IF( j.GE.n )
367 $ GO TO 20
368*
369* each step of the main loop
370* J is the last column of the previous panel
371* J1 is the first column of the current panel
372* K1 identifies if the previous column of the panel has been
373* explicitly stored, e.g., K1=1 for the first panel, and
374* K1=0 for the rest
375*
376 j1 = j+1
377 jb = min( n-j1+1, nb )
378 k1 = max(1, j)-j
379*
380* Panel factorization
381*
382 CALL clahef_aa( uplo, 2-k1, n-j, jb,
383 $ a( j+1, max(1, j) ), lda,
384 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
385*
386* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
387*
388 DO j2 = j+2, min(n, j+jb+1)
389 ipiv( j2 ) = ipiv( j2 ) + j
390 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
391 CALL cswap( j1-k1-2, a( j2, 1 ), lda,
392 $ a( ipiv(j2), 1 ), lda )
393 END IF
394 END DO
395 j = j + jb
396*
397* Trailing submatrix update, where
398* A(J2+1, J1-1) stores L(J2+1, J1) and
399* WORK(J2+1, 1) stores H(J2+1, 1)
400*
401 IF( j.LT.n ) THEN
402*
403* if the first panel and JB=1 (NB=1), then nothing to do
404*
405 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
406*
407* Merge rank-1 update with BLAS-3 update
408*
409 alpha = conjg( a( j+1, j ) )
410 a( j+1, j ) = one
411 CALL ccopy( n-j, a( j+1, j-1 ), 1,
412 $ work( (j+1-j1+1)+jb*n ), 1 )
413 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
414*
415* K1 identifies if the previous column of the panel has been
416* explicitly stored, e.g., K1=0 and K2=1 for the first panel,
417* and K1=1 and K2=0 for the rest
418*
419 IF( j1.GT.1 ) THEN
420*
421* Not first panel
422*
423 k2 = 1
424 ELSE
425*
426* First panel
427*
428 k2 = 0
429*
430* First update skips the first column
431*
432 jb = jb - 1
433 END IF
434*
435 DO j2 = j+1, n, nb
436 nj = min( nb, n-j2+1 )
437*
438* Update (J2, J2) diagonal block with CGEMV
439*
440 j3 = j2
441 DO mj = nj-1, 1, -1
442 CALL cgemm( 'No transpose',
443 $ 'Conjugate transpose',
444 $ mj, 1, jb+1,
445 $ -one, work( (j3-j1+1)+k1*n ), n,
446 $ a( j3, j1-k2 ), lda,
447 $ one, a( j3, j3 ), lda )
448 j3 = j3 + 1
449 END DO
450*
451* Update off-diagonal block of J2-th block column with CGEMM
452*
453 CALL cgemm( 'No transpose', 'Conjugate transpose',
454 $ n-j3+1, nj, jb+1,
455 $ -one, work( (j3-j1+1)+k1*n ), n,
456 $ a( j2, j1-k2 ), lda,
457 $ one, a( j3, j2 ), lda )
458 END DO
459*
460* Recover T( J+1, J )
461*
462 a( j+1, j ) = conjg( alpha )
463 END IF
464*
465* WORK(J+1, 1) stores H(J+1, 1)
466*
467 CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
468 END IF
469 GO TO 11
470 END IF
471*
472 20 CONTINUE
473 work( 1 ) = sroundup_lwork( lwkopt )
474 RETURN
475*
476* End of CHETRF_AA
477*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:160
subroutine clahef_aa(uplo, j1, m, nb, a, lda, ipiv, h, ldh, work)
CLAHEF_AA
Definition clahef_aa.f:142
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine cscal(n, ca, cx, incx)
CSCAL
Definition cscal.f:78
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81
Here is the call graph for this function:
Here is the caller graph for this function: