157 $ IPIV2, WORK, LWORK, INFO )
167 INTEGER N, LDA, LTB, LWORK, INFO
170 INTEGER IPIV( * ), IPIV2( * )
171 COMPLEX A( LDA, * ), TB( * ), WORK( * )
177 parameter( czero = ( 0.0e+0, 0.0e+0 ),
178 $ cone = ( 1.0e+0, 0.0e+0 ) )
181 LOGICAL UPPER, TQUERY, WQUERY
182 INTEGER I, J, K, I1, I2, TD
183 INTEGER LDTB, NB, KB, JB, NT, IINFO
190 EXTERNAL lsame, ilaenv, sroundup_lwork
205 upper = lsame( uplo,
'U' )
206 wquery = ( lwork.EQ.-1 )
207 tquery = ( ltb.EQ.-1 )
208 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
210 ELSE IF( n.LT.0 )
THEN
212 ELSE IF( lda.LT.max( 1, n ) )
THEN
214 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery )
THEN
216 ELSE IF ( lwork .LT. n .AND. .NOT.wquery )
THEN
221 CALL xerbla(
'CSYTRF_AA_2STAGE', -info )
227 nb = ilaenv( 1,
'CSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
230 tb( 1 ) = cmplx( (3*nb+1)*n )
233 work( 1 ) = sroundup_lwork(n*nb)
236 IF( tquery .OR. wquery )
THEN
249 IF( ldtb .LT. 3*nb+1 )
THEN
252 IF( lwork .LT. nb*n )
THEN
270 tb( 1 ) = cmplx( nb )
286 IF( i .EQ. (j-1) )
THEN
291 CALL cgemm(
'NoTranspose',
'NoTranspose',
293 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
294 $ a( (i-1)*nb+1, j*nb+1 ), lda,
295 $ czero, work( i*nb+1 ), n )
303 CALL cgemm(
'NoTranspose',
'NoTranspose',
305 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
307 $ a( (i-2)*nb+1, j*nb+1 ), lda,
308 $ czero, work( i*nb+1 ), n )
314 CALL clacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
315 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
318 CALL cgemm(
'Transpose',
'NoTranspose',
320 $ -cone, a( 1, j*nb+1 ), lda,
322 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
324 CALL cgemm(
'Transpose',
'NoTranspose',
326 $ cone, a( (j-1)*nb+1, j*nb+1 ), lda,
327 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
328 $ czero, work( 1 ), n )
329 CALL cgemm(
'NoTranspose',
'NoTranspose',
331 $ -cone, work( 1 ), n,
332 $ a( (j-2)*nb+1, j*nb+1 ), lda,
333 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
340 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
341 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
348 CALL ctrsm(
'L',
'U',
'T',
'N', kb, kb, cone,
349 $ a( (j-1)*nb+1, j*nb+1 ), lda,
350 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
351 CALL ctrsm(
'R',
'U',
'N',
'N', kb, kb, cone,
352 $ a( (j-1)*nb+1, j*nb+1 ), lda,
353 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
362 CALL cgemm(
'NoTranspose',
'NoTranspose',
364 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
365 $ a( (j-1)*nb+1, j*nb+1 ), lda,
366 $ czero, work( j*nb+1 ), n )
368 CALL cgemm(
'NoTranspose',
'NoTranspose',
370 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
372 $ a( (j-2)*nb+1, j*nb+1 ), lda,
373 $ czero, work( j*nb+1 ), n )
378 CALL cgemm(
'Transpose',
'NoTranspose',
379 $ nb, n-(j+1)*nb, j*nb,
380 $ -cone, work( nb+1 ), n,
381 $ a( 1, (j+1)*nb+1 ), lda,
382 $ cone, a( j*nb+1, (j+1)*nb+1 ), lda )
388 CALL ccopy( n-(j+1)*nb,
389 $ a( j*nb+k, (j+1)*nb+1 ), lda,
390 $ work( 1+(k-1)*n ), 1 )
395 CALL cgetrf( n-(j+1)*nb, nb,
397 $ ipiv( (j+1)*nb+1 ), iinfo )
405 CALL ccopy( n-(j+1)*nb,
406 $ work( 1+(k-1)*n ), 1,
407 $ a( j*nb+k, (j+1)*nb+1 ), lda )
412 kb = min(nb, n-(j+1)*nb)
413 CALL claset(
'Full', kb, nb, czero, czero,
414 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
415 CALL clacpy(
'Upper', kb, nb,
417 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
419 CALL ctrsm(
'R',
'U',
'N',
'U', kb, nb, cone,
420 $ a( (j-1)*nb+1, j*nb+1 ), lda,
421 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
429 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
430 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
433 CALL claset(
'Lower', kb, nb, czero, cone,
434 $ a( j*nb+1, (j+1)*nb+1), lda )
440 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
443 i2 = ipiv( (j+1)*nb+k )
446 CALL cswap( k-1, a( (j+1)*nb+1, i1 ), 1,
447 $ a( (j+1)*nb+1, i2 ), 1 )
450 $
CALL cswap( i2-i1-1, a( i1, i1+1 ), lda,
454 $
CALL cswap( n-i2, a( i1, i2+1 ), lda,
455 $ a( i2, i2+1 ), lda )
458 a( i1, i1 ) = a( i2, i2 )
462 CALL cswap( j*nb, a( 1, i1 ), 1,
483 IF( i .EQ. (j-1) )
THEN
488 CALL cgemm(
'NoTranspose',
'Transpose',
490 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
491 $ a( j*nb+1, (i-1)*nb+1 ), lda,
492 $ czero, work( i*nb+1 ), n )
495 IF( i .EQ. (j-1) )
THEN
500 CALL cgemm(
'NoTranspose',
'Transpose',
502 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
504 $ a( j*nb+1, (i-2)*nb+1 ), lda,
505 $ czero, work( i*nb+1 ), n )
511 CALL clacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
512 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
515 CALL cgemm(
'NoTranspose',
'NoTranspose',
517 $ -cone, a( j*nb+1, 1 ), lda,
519 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
521 CALL cgemm(
'NoTranspose',
'NoTranspose',
523 $ cone, a( j*nb+1, (j-1)*nb+1 ), lda,
524 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
525 $ czero, work( 1 ), n )
526 CALL cgemm(
'NoTranspose',
'Transpose',
528 $ -cone, work( 1 ), n,
529 $ a( j*nb+1, (j-2)*nb+1 ), lda,
530 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
537 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
538 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
545 CALL ctrsm(
'L',
'L',
'N',
'N', kb, kb, cone,
546 $ a( j*nb+1, (j-1)*nb+1 ), lda,
547 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
548 CALL ctrsm(
'R',
'L',
'T',
'N', kb, kb, cone,
549 $ a( j*nb+1, (j-1)*nb+1 ), lda,
550 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
557 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
558 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
568 CALL cgemm(
'NoTranspose',
'Transpose',
570 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
571 $ a( j*nb+1, (j-1)*nb+1 ), lda,
572 $ czero, work( j*nb+1 ), n )
574 CALL cgemm(
'NoTranspose',
'Transpose',
576 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
578 $ a( j*nb+1, (j-2)*nb+1 ), lda,
579 $ czero, work( j*nb+1 ), n )
584 CALL cgemm(
'NoTranspose',
'NoTranspose',
585 $ n-(j+1)*nb, nb, j*nb,
586 $ -cone, a( (j+1)*nb+1, 1 ), lda,
588 $ cone, a( (j+1)*nb+1, j*nb+1 ), lda )
593 CALL cgetrf( n-(j+1)*nb, nb,
594 $ a( (j+1)*nb+1, j*nb+1 ), lda,
595 $ ipiv( (j+1)*nb+1 ), iinfo )
602 kb = min(nb, n-(j+1)*nb)
603 CALL claset(
'Full', kb, nb, czero, czero,
604 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
605 CALL clacpy(
'Upper', kb, nb,
606 $ a( (j+1)*nb+1, j*nb+1 ), lda,
607 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
609 CALL ctrsm(
'R',
'L',
'T',
'U', kb, nb, cone,
610 $ a( j*nb+1, (j-1)*nb+1 ), lda,
611 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
619 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
620 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
623 CALL claset(
'Upper', kb, nb, czero, cone,
624 $ a( (j+1)*nb+1, j*nb+1 ), lda )
630 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
633 i2 = ipiv( (j+1)*nb+k )
636 CALL cswap( k-1, a( i1, (j+1)*nb+1 ), lda,
637 $ a( i2, (j+1)*nb+1 ), lda )
640 $
CALL cswap( i2-i1-1, a( i1+1, i1 ), 1,
641 $ a( i2, i1+1 ), lda )
644 $
CALL cswap( n-i2, a( i2+1, i1 ), 1,
648 a( i1, i1 ) = a( i2, i2 )
652 CALL cswap( j*nb, a( i1, 1 ), lda,
667 CALL cgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )