159 $ IPIV2, WORK, LWORK, INFO )
169 INTEGER N, LDA, LTB, LWORK, INFO
172 INTEGER IPIV( * ), IPIV2( * )
173 COMPLEX A( LDA, * ), TB( * ), WORK( * )
179 parameter( czero = ( 0.0e+0, 0.0e+0 ),
180 $ cone = ( 1.0e+0, 0.0e+0 ) )
183 LOGICAL UPPER, TQUERY, WQUERY
184 INTEGER I, J, K, I1, I2, TD
185 INTEGER LDTB, NB, KB, JB, NT, IINFO
192 EXTERNAL lsame, ilaenv, sroundup_lwork
206 upper = lsame( uplo,
'U' )
207 wquery = ( lwork.EQ.-1 )
208 tquery = ( ltb.EQ.-1 )
209 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
211 ELSE IF( n.LT.0 )
THEN
213 ELSE IF( lda.LT.max( 1, n ) )
THEN
215 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery )
THEN
217 ELSE IF ( lwork .LT. n .AND. .NOT.wquery )
THEN
222 CALL xerbla(
'CSYTRF_AA_2STAGE', -info )
228 nb = ilaenv( 1,
'CSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
234 work( 1 ) = sroundup_lwork(n*nb)
237 IF( tquery .OR. wquery )
THEN
250 IF( ldtb .LT. 3*nb+1 )
THEN
253 IF( lwork .LT. nb*n )
THEN
287 IF( i .EQ. (j-1) )
THEN
292 CALL cgemm(
'NoTranspose',
'NoTranspose',
294 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
295 $ a( (i-1)*nb+1, j*nb+1 ), lda,
296 $ czero, work( i*nb+1 ), n )
304 CALL cgemm(
'NoTranspose',
'NoTranspose',
306 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
308 $ a( (i-2)*nb+1, j*nb+1 ), lda,
309 $ czero, work( i*nb+1 ), n )
315 CALL clacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
316 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
319 CALL cgemm(
'Transpose',
'NoTranspose',
321 $ -cone, a( 1, j*nb+1 ), lda,
323 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
325 CALL cgemm(
'Transpose',
'NoTranspose',
327 $ cone, a( (j-1)*nb+1, j*nb+1 ), lda,
328 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
329 $ czero, work( 1 ), n )
330 CALL cgemm(
'NoTranspose',
'NoTranspose',
332 $ -cone, work( 1 ), n,
333 $ a( (j-2)*nb+1, j*nb+1 ), lda,
334 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
341 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
342 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
349 CALL ctrsm(
'L',
'U',
'T',
'N', kb, kb, cone,
350 $ a( (j-1)*nb+1, j*nb+1 ), lda,
351 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
352 CALL ctrsm(
'R',
'U',
'N',
'N', kb, kb, cone,
353 $ a( (j-1)*nb+1, j*nb+1 ), lda,
354 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
363 CALL cgemm(
'NoTranspose',
'NoTranspose',
365 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
366 $ a( (j-1)*nb+1, j*nb+1 ), lda,
367 $ czero, work( j*nb+1 ), n )
369 CALL cgemm(
'NoTranspose',
'NoTranspose',
371 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
373 $ a( (j-2)*nb+1, j*nb+1 ), lda,
374 $ czero, work( j*nb+1 ), n )
379 CALL cgemm(
'Transpose',
'NoTranspose',
380 $ nb, n-(j+1)*nb, j*nb,
381 $ -cone, work( nb+1 ), n,
382 $ a( 1, (j+1)*nb+1 ), lda,
383 $ cone, a( j*nb+1, (j+1)*nb+1 ), lda )
389 CALL ccopy( n-(j+1)*nb,
390 $ a( j*nb+k, (j+1)*nb+1 ), lda,
391 $ work( 1+(k-1)*n ), 1 )
396 CALL cgetrf( n-(j+1)*nb, nb,
398 $ ipiv( (j+1)*nb+1 ), iinfo )
406 CALL ccopy( n-(j+1)*nb,
407 $ work( 1+(k-1)*n ), 1,
408 $ a( j*nb+k, (j+1)*nb+1 ), lda )
413 kb = min(nb, n-(j+1)*nb)
414 CALL claset(
'Full', kb, nb, czero, czero,
415 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
416 CALL clacpy(
'Upper', kb, nb,
418 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
420 CALL ctrsm(
'R',
'U',
'N',
'U', kb, nb, cone,
421 $ a( (j-1)*nb+1, j*nb+1 ), lda,
422 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
430 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
431 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
434 CALL claset(
'Lower', kb, nb, czero, cone,
435 $ a( j*nb+1, (j+1)*nb+1), lda )
441 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
444 i2 = ipiv( (j+1)*nb+k )
447 CALL cswap( k-1, a( (j+1)*nb+1, i1 ), 1,
448 $ a( (j+1)*nb+1, i2 ), 1 )
451 $
CALL cswap( i2-i1-1, a( i1, i1+1 ), lda,
455 $
CALL cswap( n-i2, a( i1, i2+1 ), lda,
456 $ a( i2, i2+1 ), lda )
459 a( i1, i1 ) = a( i2, i2 )
463 CALL cswap( j*nb, a( 1, i1 ), 1,
484 IF( i .EQ. (j-1) )
THEN
489 CALL cgemm(
'NoTranspose',
'Transpose',
491 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
492 $ a( j*nb+1, (i-1)*nb+1 ), lda,
493 $ czero, work( i*nb+1 ), n )
496 IF( i .EQ. (j-1) )
THEN
501 CALL cgemm(
'NoTranspose',
'Transpose',
503 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
505 $ a( j*nb+1, (i-2)*nb+1 ), lda,
506 $ czero, work( i*nb+1 ), n )
512 CALL clacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
513 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
516 CALL cgemm(
'NoTranspose',
'NoTranspose',
518 $ -cone, a( j*nb+1, 1 ), lda,
520 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
522 CALL cgemm(
'NoTranspose',
'NoTranspose',
524 $ cone, a( j*nb+1, (j-1)*nb+1 ), lda,
525 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
526 $ czero, work( 1 ), n )
527 CALL cgemm(
'NoTranspose',
'Transpose',
529 $ -cone, work( 1 ), n,
530 $ a( j*nb+1, (j-2)*nb+1 ), lda,
531 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
538 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
539 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
546 CALL ctrsm(
'L',
'L',
'N',
'N', kb, kb, cone,
547 $ a( j*nb+1, (j-1)*nb+1 ), lda,
548 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
549 CALL ctrsm(
'R',
'L',
'T',
'N', kb, kb, cone,
550 $ a( j*nb+1, (j-1)*nb+1 ), lda,
551 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
558 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
559 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
569 CALL cgemm(
'NoTranspose',
'Transpose',
571 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
572 $ a( j*nb+1, (j-1)*nb+1 ), lda,
573 $ czero, work( j*nb+1 ), n )
575 CALL cgemm(
'NoTranspose',
'Transpose',
577 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
579 $ a( j*nb+1, (j-2)*nb+1 ), lda,
580 $ czero, work( j*nb+1 ), n )
585 CALL cgemm(
'NoTranspose',
'NoTranspose',
586 $ n-(j+1)*nb, nb, j*nb,
587 $ -cone, a( (j+1)*nb+1, 1 ), lda,
589 $ cone, a( (j+1)*nb+1, j*nb+1 ), lda )
594 CALL cgetrf( n-(j+1)*nb, nb,
595 $ a( (j+1)*nb+1, j*nb+1 ), lda,
596 $ ipiv( (j+1)*nb+1 ), iinfo )
603 kb = min(nb, n-(j+1)*nb)
604 CALL claset(
'Full', kb, nb, czero, czero,
605 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
606 CALL clacpy(
'Upper', kb, nb,
607 $ a( (j+1)*nb+1, j*nb+1 ), lda,
608 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
610 CALL ctrsm(
'R',
'L',
'T',
'U', kb, nb, cone,
611 $ a( j*nb+1, (j-1)*nb+1 ), lda,
612 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
620 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
621 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
624 CALL claset(
'Upper', kb, nb, czero, cone,
625 $ a( (j+1)*nb+1, j*nb+1 ), lda )
631 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
634 i2 = ipiv( (j+1)*nb+k )
637 CALL cswap( k-1, a( i1, (j+1)*nb+1 ), lda,
638 $ a( i2, (j+1)*nb+1 ), lda )
641 $
CALL cswap( i2-i1-1, a( i1+1, i1 ), 1,
642 $ a( i2, i1+1 ), lda )
645 $
CALL cswap( n-i2, a( i2+1, i1 ), 1,
649 a( i1, i1 ) = a( i2, i2 )
653 CALL cswap( j*nb, a( i1, 1 ), lda,
668 CALL cgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
subroutine xerbla(srname, info)
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
subroutine cgbtrf(m, n, kl, ku, ab, ldab, ipiv, info)
CGBTRF
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
subroutine cgetrf(m, n, a, lda, ipiv, info)
CGETRF
subroutine csytrf_aa_2stage(uplo, n, a, lda, tb, ltb, ipiv, ipiv2, work, lwork, info)
CSYTRF_AA_2STAGE
subroutine clacpy(uplo, m, n, a, lda, b, ldb)
CLACPY copies all or part of one two-dimensional array to another.
subroutine claset(uplo, m, n, alpha, beta, a, lda)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
subroutine ctrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
CTRSM