LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ chetrf_aa()

subroutine chetrf_aa ( character  uplo,
integer  n,
complex, dimension( lda, * )  a,
integer  lda,
integer, dimension( * )  ipiv,
complex, dimension( * )  work,
integer  lwork,
integer  info 
)

CHETRF_AA

Download CHETRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CHETRF_AA computes the factorization of a complex hermitian matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**H*T*U  or  A = L*T*L**H

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a hermitian tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is COMPLEX array, dimension (LDA,N)
          On entry, the hermitian matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiagonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is COMPLEX array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK.  LWORK >= 2*N. For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 131 of file chetrf_aa.f.

132*
133* -- LAPACK computational routine --
134* -- LAPACK is a software package provided by Univ. of Tennessee, --
135* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
136*
137 IMPLICIT NONE
138*
139* .. Scalar Arguments ..
140 CHARACTER UPLO
141 INTEGER N, LDA, LWORK, INFO
142* ..
143* .. Array Arguments ..
144 INTEGER IPIV( * )
145 COMPLEX A( LDA, * ), WORK( * )
146* ..
147*
148* =====================================================================
149* .. Parameters ..
150 COMPLEX ZERO, ONE
151 parameter( zero = (0.0e+0, 0.0e+0), one = (1.0e+0, 0.0e+0) )
152*
153* .. Local Scalars ..
154 LOGICAL LQUERY, UPPER
155 INTEGER J, LWKOPT
156 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
157 COMPLEX ALPHA
158* ..
159* .. External Functions ..
160 LOGICAL LSAME
161 INTEGER ILAENV
162 REAL SROUNDUP_LWORK
163 EXTERNAL lsame, ilaenv, sroundup_lwork
164* ..
165* .. External Subroutines ..
166 EXTERNAL clahef_aa, cgemm, ccopy, cswap, cscal, xerbla
167* ..
168* .. Intrinsic Functions ..
169 INTRINSIC real, conjg, max
170* ..
171* .. Executable Statements ..
172*
173* Determine the block size
174*
175 nb = ilaenv( 1, 'CHETRF_AA', uplo, n, -1, -1, -1 )
176*
177* Test the input parameters.
178*
179 info = 0
180 upper = lsame( uplo, 'U' )
181 lquery = ( lwork.EQ.-1 )
182 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
183 info = -1
184 ELSE IF( n.LT.0 ) THEN
185 info = -2
186 ELSE IF( lda.LT.max( 1, n ) ) THEN
187 info = -4
188 ELSE IF( lwork.LT.( 2*n ) .AND. .NOT.lquery ) THEN
189 info = -7
190 END IF
191*
192 IF( info.EQ.0 ) THEN
193 lwkopt = (nb+1)*n
194 work( 1 ) = sroundup_lwork(lwkopt)
195 END IF
196*
197 IF( info.NE.0 ) THEN
198 CALL xerbla( 'CHETRF_AA', -info )
199 RETURN
200 ELSE IF( lquery ) THEN
201 RETURN
202 END IF
203*
204* Quick return
205*
206 IF ( n.EQ.0 ) THEN
207 RETURN
208 ENDIF
209 ipiv( 1 ) = 1
210 IF ( n.EQ.1 ) THEN
211 a( 1, 1 ) = real( a( 1, 1 ) )
212 RETURN
213 END IF
214*
215* Adjust block size based on the workspace size
216*
217 IF( lwork.LT.((1+nb)*n) ) THEN
218 nb = ( lwork-n ) / n
219 END IF
220*
221 IF( upper ) THEN
222*
223* .....................................................
224* Factorize A as U**H*D*U using the upper triangle of A
225* .....................................................
226*
227* copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
228*
229 CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
230*
231* J is the main loop index, increasing from 1 to N in steps of
232* JB, where JB is the number of columns factorized by CLAHEF;
233* JB is either NB, or N-J+1 for the last block
234*
235 j = 0
236 10 CONTINUE
237 IF( j.GE.n )
238 $ GO TO 20
239*
240* each step of the main loop
241* J is the last column of the previous panel
242* J1 is the first column of the current panel
243* K1 identifies if the previous column of the panel has been
244* explicitly stored, e.g., K1=1 for the first panel, and
245* K1=0 for the rest
246*
247 j1 = j + 1
248 jb = min( n-j1+1, nb )
249 k1 = max(1, j)-j
250*
251* Panel factorization
252*
253 CALL clahef_aa( uplo, 2-k1, n-j, jb,
254 $ a( max(1, j), j+1 ), lda,
255 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
256*
257* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
258*
259 DO j2 = j+2, min(n, j+jb+1)
260 ipiv( j2 ) = ipiv( j2 ) + j
261 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
262 CALL cswap( j1-k1-2, a( 1, j2 ), 1,
263 $ a( 1, ipiv(j2) ), 1 )
264 END IF
265 END DO
266 j = j + jb
267*
268* Trailing submatrix update, where
269* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
270* WORK stores the current block of the auxiriarly matrix H
271*
272 IF( j.LT.n ) THEN
273*
274* if the first panel and JB=1 (NB=1), then nothing to do
275*
276 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
277*
278* Merge rank-1 update with BLAS-3 update
279*
280 alpha = conjg( a( j, j+1 ) )
281 a( j, j+1 ) = one
282 CALL ccopy( n-j, a( j-1, j+1 ), lda,
283 $ work( (j+1-j1+1)+jb*n ), 1 )
284 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
285*
286* K1 identifies if the previous column of the panel has been
287* explicitly stored, e.g., K1=0 and K2=1 for the first panel,
288* and K1=1 and K2=0 for the rest
289*
290 IF( j1.GT.1 ) THEN
291*
292* Not first panel
293*
294 k2 = 1
295 ELSE
296*
297* First panel
298*
299 k2 = 0
300*
301* First update skips the first column
302*
303 jb = jb - 1
304 END IF
305*
306 DO j2 = j+1, n, nb
307 nj = min( nb, n-j2+1 )
308*
309* Update (J2, J2) diagonal block with CGEMV
310*
311 j3 = j2
312 DO mj = nj-1, 1, -1
313 CALL cgemm( 'Conjugate transpose', 'Transpose',
314 $ 1, mj, jb+1,
315 $ -one, a( j1-k2, j3 ), lda,
316 $ work( (j3-j1+1)+k1*n ), n,
317 $ one, a( j3, j3 ), lda )
318 j3 = j3 + 1
319 END DO
320*
321* Update off-diagonal block of J2-th block row with CGEMM
322*
323 CALL cgemm( 'Conjugate transpose', 'Transpose',
324 $ nj, n-j3+1, jb+1,
325 $ -one, a( j1-k2, j2 ), lda,
326 $ work( (j3-j1+1)+k1*n ), n,
327 $ one, a( j2, j3 ), lda )
328 END DO
329*
330* Recover T( J, J+1 )
331*
332 a( j, j+1 ) = conjg( alpha )
333 END IF
334*
335* WORK(J+1, 1) stores H(J+1, 1)
336*
337 CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
338 END IF
339 GO TO 10
340 ELSE
341*
342* .....................................................
343* Factorize A as L*D*L**H using the lower triangle of A
344* .....................................................
345*
346* copy first column A(1:N, 1) into H(1:N, 1)
347* (stored in WORK(1:N))
348*
349 CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
350*
351* J is the main loop index, increasing from 1 to N in steps of
352* JB, where JB is the number of columns factorized by CLAHEF;
353* JB is either NB, or N-J+1 for the last block
354*
355 j = 0
356 11 CONTINUE
357 IF( j.GE.n )
358 $ GO TO 20
359*
360* each step of the main loop
361* J is the last column of the previous panel
362* J1 is the first column of the current panel
363* K1 identifies if the previous column of the panel has been
364* explicitly stored, e.g., K1=1 for the first panel, and
365* K1=0 for the rest
366*
367 j1 = j+1
368 jb = min( n-j1+1, nb )
369 k1 = max(1, j)-j
370*
371* Panel factorization
372*
373 CALL clahef_aa( uplo, 2-k1, n-j, jb,
374 $ a( j+1, max(1, j) ), lda,
375 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
376*
377* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
378*
379 DO j2 = j+2, min(n, j+jb+1)
380 ipiv( j2 ) = ipiv( j2 ) + j
381 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
382 CALL cswap( j1-k1-2, a( j2, 1 ), lda,
383 $ a( ipiv(j2), 1 ), lda )
384 END IF
385 END DO
386 j = j + jb
387*
388* Trailing submatrix update, where
389* A(J2+1, J1-1) stores L(J2+1, J1) and
390* WORK(J2+1, 1) stores H(J2+1, 1)
391*
392 IF( j.LT.n ) THEN
393*
394* if the first panel and JB=1 (NB=1), then nothing to do
395*
396 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
397*
398* Merge rank-1 update with BLAS-3 update
399*
400 alpha = conjg( a( j+1, j ) )
401 a( j+1, j ) = one
402 CALL ccopy( n-j, a( j+1, j-1 ), 1,
403 $ work( (j+1-j1+1)+jb*n ), 1 )
404 CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
405*
406* K1 identifies if the previous column of the panel has been
407* explicitly stored, e.g., K1=0 and K2=1 for the first panel,
408* and K1=1 and K2=0 for the rest
409*
410 IF( j1.GT.1 ) THEN
411*
412* Not first panel
413*
414 k2 = 1
415 ELSE
416*
417* First panel
418*
419 k2 = 0
420*
421* First update skips the first column
422*
423 jb = jb - 1
424 END IF
425*
426 DO j2 = j+1, n, nb
427 nj = min( nb, n-j2+1 )
428*
429* Update (J2, J2) diagonal block with CGEMV
430*
431 j3 = j2
432 DO mj = nj-1, 1, -1
433 CALL cgemm( 'No transpose', 'Conjugate transpose',
434 $ mj, 1, jb+1,
435 $ -one, work( (j3-j1+1)+k1*n ), n,
436 $ a( j3, j1-k2 ), lda,
437 $ one, a( j3, j3 ), lda )
438 j3 = j3 + 1
439 END DO
440*
441* Update off-diagonal block of J2-th block column with CGEMM
442*
443 CALL cgemm( 'No transpose', 'Conjugate transpose',
444 $ n-j3+1, nj, jb+1,
445 $ -one, work( (j3-j1+1)+k1*n ), n,
446 $ a( j2, j1-k2 ), lda,
447 $ one, a( j3, j2 ), lda )
448 END DO
449*
450* Recover T( J+1, J )
451*
452 a( j+1, j ) = conjg( alpha )
453 END IF
454*
455* WORK(J+1, 1) stores H(J+1, 1)
456*
457 CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
458 END IF
459 GO TO 11
460 END IF
461*
462 20 CONTINUE
463 work( 1 ) = sroundup_lwork(lwkopt)
464 RETURN
465*
466* End of CHETRF_AA
467*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine clahef_aa(uplo, j1, m, nb, a, lda, ipiv, h, ldh, work)
CLAHEF_AA
Definition clahef_aa.f:144
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine cscal(n, ca, cx, incx)
CSCAL
Definition cscal.f:78
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81
Here is the call graph for this function:
Here is the caller graph for this function: