157 $ IPIV2, WORK, LWORK, INFO )
167 INTEGER N, LDA, LTB, LWORK, INFO
170 INTEGER IPIV( * ), IPIV2( * )
171 REAL A( LDA, * ), TB( * ), WORK( * )
177 parameter( zero = 0.0e+0, one = 1.0e+0 )
180 LOGICAL UPPER, TQUERY, WQUERY
181 INTEGER I, J, K, I1, I2, TD
182 INTEGER LDTB, NB, KB, JB, NT, IINFO
189 EXTERNAL lsame, ilaenv, sroundup_lwork
204 upper = lsame( uplo,
'U' )
205 wquery = ( lwork.EQ.-1 )
206 tquery = ( ltb.EQ.-1 )
207 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
209 ELSE IF( n.LT.0 )
THEN
211 ELSE IF( lda.LT.max( 1, n ) )
THEN
213 ELSE IF( ltb.LT.max( 1, 4*n ) .AND. .NOT.tquery )
THEN
215 ELSE IF( lwork.LT.max( 1, n ) .AND. .NOT.wquery )
THEN
220 CALL xerbla(
'SSYTRF_AA_2STAGE', -info )
226 nb = ilaenv( 1,
'SSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
229 tb( 1 ) = sroundup_lwork( max( 1, (3*nb+1)*n ) )
232 work( 1 ) = sroundup_lwork( max( 1, n*nb ) )
235 IF( tquery .OR. wquery )
THEN
248 IF( ldtb .LT. 3*nb+1 )
THEN
251 IF( lwork .LT. nb*n )
THEN
285 IF( i .EQ. (j-1) )
THEN
290 CALL sgemm(
'NoTranspose',
'NoTranspose',
292 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
293 $ a( (i-1)*nb+1, j*nb+1 ), lda,
294 $ zero, work( i*nb+1 ), n )
302 CALL sgemm(
'NoTranspose',
'NoTranspose',
304 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
306 $ a( (i-2)*nb+1, j*nb+1 ), lda,
307 $ zero, work( i*nb+1 ), n )
313 CALL slacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
314 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
317 CALL sgemm(
'Transpose',
'NoTranspose',
319 $ -one, a( 1, j*nb+1 ), lda,
321 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
323 CALL sgemm(
'Transpose',
'NoTranspose',
325 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
326 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
327 $ zero, work( 1 ), n )
328 CALL sgemm(
'NoTranspose',
'NoTranspose',
330 $ -one, work( 1 ), n,
331 $ a( (j-2)*nb+1, j*nb+1 ), lda,
332 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
335 CALL ssygst( 1,
'Upper', kb,
336 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
337 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
344 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
345 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
355 CALL sgemm(
'NoTranspose',
'NoTranspose',
357 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
358 $ a( (j-1)*nb+1, j*nb+1 ), lda,
359 $ zero, work( j*nb+1 ), n )
361 CALL sgemm(
'NoTranspose',
'NoTranspose',
363 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
365 $ a( (j-2)*nb+1, j*nb+1 ), lda,
366 $ zero, work( j*nb+1 ), n )
371 CALL sgemm(
'Transpose',
'NoTranspose',
372 $ nb, n-(j+1)*nb, j*nb,
373 $ -one, work( nb+1 ), n,
374 $ a( 1, (j+1)*nb+1 ), lda,
375 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
381 CALL scopy( n-(j+1)*nb,
382 $ a( j*nb+k, (j+1)*nb+1 ), lda,
383 $ work( 1+(k-1)*n ), 1 )
388 CALL sgetrf( n-(j+1)*nb, nb,
390 $ ipiv( (j+1)*nb+1 ), iinfo )
398 CALL scopy( n-(j+1)*nb,
399 $ work( 1+(k-1)*n ), 1,
400 $ a( j*nb+k, (j+1)*nb+1 ), lda )
405 kb = min(nb, n-(j+1)*nb)
406 CALL slaset(
'Full', kb, nb, zero, zero,
407 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
408 CALL slacpy(
'Upper', kb, nb,
410 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
412 CALL strsm(
'R',
'U',
'N',
'U', kb, nb, one,
413 $ a( (j-1)*nb+1, j*nb+1 ), lda,
414 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
422 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
423 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
426 CALL slaset(
'Lower', kb, nb, zero, one,
427 $ a( j*nb+1, (j+1)*nb+1), lda )
433 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
436 i2 = ipiv( (j+1)*nb+k )
439 CALL sswap( k-1, a( (j+1)*nb+1, i1 ), 1,
440 $ a( (j+1)*nb+1, i2 ), 1 )
443 $
CALL sswap( i2-i1-1, a( i1, i1+1 ), lda,
447 $
CALL sswap( n-i2, a( i1, i2+1 ), lda,
448 $ a( i2, i2+1 ), lda )
451 a( i1, i1 ) = a( i2, i2 )
455 CALL sswap( j*nb, a( 1, i1 ), 1,
476 IF( i .EQ. (j-1) )
THEN
481 CALL sgemm(
'NoTranspose',
'Transpose',
483 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
484 $ a( j*nb+1, (i-1)*nb+1 ), lda,
485 $ zero, work( i*nb+1 ), n )
493 CALL sgemm(
'NoTranspose',
'Transpose',
495 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
497 $ a( j*nb+1, (i-2)*nb+1 ), lda,
498 $ zero, work( i*nb+1 ), n )
504 CALL slacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
505 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
508 CALL sgemm(
'NoTranspose',
'NoTranspose',
510 $ -one, a( j*nb+1, 1 ), lda,
512 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
514 CALL sgemm(
'NoTranspose',
'NoTranspose',
516 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
517 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
518 $ zero, work( 1 ), n )
519 CALL sgemm(
'NoTranspose',
'Transpose',
521 $ -one, work( 1 ), n,
522 $ a( j*nb+1, (j-2)*nb+1 ), lda,
523 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
526 CALL ssygst( 1,
'Lower', kb,
527 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
528 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
535 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
536 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
546 CALL sgemm(
'NoTranspose',
'Transpose',
548 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
549 $ a( j*nb+1, (j-1)*nb+1 ), lda,
550 $ zero, work( j*nb+1 ), n )
552 CALL sgemm(
'NoTranspose',
'Transpose',
554 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
556 $ a( j*nb+1, (j-2)*nb+1 ), lda,
557 $ zero, work( j*nb+1 ), n )
562 CALL sgemm(
'NoTranspose',
'NoTranspose',
563 $ n-(j+1)*nb, nb, j*nb,
564 $ -one, a( (j+1)*nb+1, 1 ), lda,
566 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
571 CALL sgetrf( n-(j+1)*nb, nb,
572 $ a( (j+1)*nb+1, j*nb+1 ), lda,
573 $ ipiv( (j+1)*nb+1 ), iinfo )
580 kb = min(nb, n-(j+1)*nb)
581 CALL slaset(
'Full', kb, nb, zero, zero,
582 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
583 CALL slacpy(
'Upper', kb, nb,
584 $ a( (j+1)*nb+1, j*nb+1 ), lda,
585 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
587 CALL strsm(
'R',
'L',
'T',
'U', kb, nb, one,
588 $ a( j*nb+1, (j-1)*nb+1 ), lda,
589 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
597 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
598 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
601 CALL slaset(
'Upper', kb, nb, zero, one,
602 $ a( (j+1)*nb+1, j*nb+1), lda )
608 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
611 i2 = ipiv( (j+1)*nb+k )
614 CALL sswap( k-1, a( i1, (j+1)*nb+1 ), lda,
615 $ a( i2, (j+1)*nb+1 ), lda )
618 $
CALL sswap( i2-i1-1, a( i1+1, i1 ), 1,
619 $ a( i2, i1+1 ), lda )
622 $
CALL sswap( n-i2, a( i2+1, i1 ), 1,
626 a( i1, i1 ) = a( i2, i2 )
630 CALL sswap( j*nb, a( i1, 1 ), lda,
645 CALL sgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )