158
159
160
161
162
163 IMPLICIT NONE
164
165
166 CHARACTER UPLO
167 INTEGER N, LDA, LTB, LWORK, INFO
168
169
170 INTEGER IPIV( * ), IPIV2( * )
171 REAL A( LDA, * ), TB( * ), WORK( * )
172
173
174
175
176 REAL ZERO, ONE
177 parameter( zero = 0.0e+0, one = 1.0e+0 )
178
179
180 LOGICAL UPPER, TQUERY, WQUERY
181 INTEGER I, J, K, I1, I2, TD
182 INTEGER LDTB, NB, KB, JB, NT, IINFO
183 REAL PIV
184
185
186 LOGICAL LSAME
187 INTEGER ILAENV
188 REAL SROUNDUP_LWORK
190
191
195
196
197 INTRINSIC min, max
198
199
200
201
202
203 info = 0
204 upper =
lsame( uplo,
'U' )
205 wquery = ( lwork.EQ.-1 )
206 tquery = ( ltb.EQ.-1 )
207 IF( .NOT.upper .AND. .NOT.
lsame( uplo,
'L' ) )
THEN
208 info = -1
209 ELSE IF( n.LT.0 ) THEN
210 info = -2
211 ELSE IF( lda.LT.max( 1, n ) ) THEN
212 info = -4
213 ELSE IF( ltb.LT.max( 1, 4*n ) .AND. .NOT.tquery ) THEN
214 info = -6
215 ELSE IF( lwork.LT.max( 1, n ) .AND. .NOT.wquery ) THEN
216 info = -10
217 END IF
218
219 IF( info.NE.0 ) THEN
220 CALL xerbla(
'SSYTRF_AA_2STAGE', -info )
221 RETURN
222 END IF
223
224
225
226 nb =
ilaenv( 1,
'SSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
227 IF( info.EQ.0 ) THEN
228 IF( tquery ) THEN
230 END IF
231 IF( wquery ) THEN
233 END IF
234 END IF
235 IF( tquery .OR. wquery ) THEN
236 RETURN
237 END IF
238
239
240
241 IF( n.EQ.0 ) THEN
242 RETURN
243 ENDIF
244
245
246
247 ldtb = ltb/n
248 IF( ldtb .LT. 3*nb+1 ) THEN
249 nb = (ldtb-1)/3
250 END IF
251 IF( lwork .LT. nb*n ) THEN
252 nb = lwork/n
253 END IF
254
255
256
257 nt = (n+nb-1)/nb
258 td = 2*nb
259 kb = min(nb, n)
260
261
262
263 DO j = 1, kb
264 ipiv( j ) = j
265 END DO
266
267
268
269 tb( 1 ) = real( nb )
270
271 IF( upper ) THEN
272
273
274
275
276
277 DO j = 0, nt-1
278
279
280
281 kb = min(nb, n-j*nb)
282 DO i = 1, j-1
283 IF( i.EQ.1 ) THEN
284
285 IF( i .EQ. (j-1) ) THEN
286 jb = nb+kb
287 ELSE
288 jb = 2*nb
289 END IF
290 CALL sgemm(
'NoTranspose',
'NoTranspose',
291 $ nb, kb, jb,
292 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
293 $ a( (i-1)*nb+1, j*nb+1 ), lda,
294 $ zero, work( i*nb+1 ), n )
295 ELSE
296
297 IF( i .EQ. j-1) THEN
298 jb = 2*nb+kb
299 ELSE
300 jb = 3*nb
301 END IF
302 CALL sgemm(
'NoTranspose',
'NoTranspose',
303 $ nb, kb, jb,
304 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
305 $ ldtb-1,
306 $ a( (i-2)*nb+1, j*nb+1 ), lda,
307 $ zero, work( i*nb+1 ), n )
308 END IF
309 END DO
310
311
312
313 CALL slacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
314 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
315 IF( j.GT.1 ) THEN
316
317 CALL sgemm(
'Transpose',
'NoTranspose',
318 $ kb, kb, (j-1)*nb,
319 $ -one, a( 1, j*nb+1 ), lda,
320 $ work( nb+1 ), n,
321 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
322
323 CALL sgemm(
'Transpose',
'NoTranspose',
324 $ kb, nb, kb,
325 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
326 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
327 $ zero, work( 1 ), n )
328 CALL sgemm(
'NoTranspose',
'NoTranspose',
329 $ kb, kb, nb,
330 $ -one, work( 1 ), n,
331 $ a( (j-2)*nb+1, j*nb+1 ), lda,
332 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
333 END IF
334 IF( j.GT.0 ) THEN
335 CALL ssygst( 1,
'Upper', kb,
336 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
337 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
338 END IF
339
340
341
342 DO i = 1, kb
343 DO k = i+1, kb
344 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
345 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
346 END DO
347 END DO
348
349 IF( j.LT.nt-1 ) THEN
350 IF( j.GT.0 ) THEN
351
352
353
354 IF( j.EQ.1 ) THEN
355 CALL sgemm(
'NoTranspose',
'NoTranspose',
356 $ kb, kb, kb,
357 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
358 $ a( (j-1)*nb+1, j*nb+1 ), lda,
359 $ zero, work( j*nb+1 ), n )
360 ELSE
361 CALL sgemm(
'NoTranspose',
'NoTranspose',
362 $ kb, kb, nb+kb,
363 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
364 $ ldtb-1,
365 $ a( (j-2)*nb+1, j*nb+1 ), lda,
366 $ zero, work( j*nb+1 ), n )
367 END IF
368
369
370
371 CALL sgemm(
'Transpose',
'NoTranspose',
372 $ nb, n-(j+1)*nb, j*nb,
373 $ -one, work( nb+1 ), n,
374 $ a( 1, (j+1)*nb+1 ), lda,
375 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
376 END IF
377
378
379
380 DO k = 1, nb
381 CALL scopy( n-(j+1)*nb,
382 $ a( j*nb+k, (j+1)*nb+1 ), lda,
383 $ work( 1+(k-1)*n ), 1 )
384 END DO
385
386
387
388 CALL sgetrf( n-(j+1)*nb, nb,
389 $ work, n,
390 $ ipiv( (j+1)*nb+1 ), iinfo )
391
392
393
394
395
396
397 DO k = 1, nb
398 CALL scopy( n-(j+1)*nb,
399 $ work( 1+(k-1)*n ), 1,
400 $ a( j*nb+k, (j+1)*nb+1 ), lda )
401 END DO
402
403
404
405 kb = min(nb, n-(j+1)*nb)
406 CALL slaset(
'Full', kb, nb, zero, zero,
407 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
408 CALL slacpy(
'Upper', kb, nb,
409 $ work, n,
410 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
411 IF( j.GT.0 ) THEN
412 CALL strsm(
'R',
'U',
'N',
'U', kb, nb, one,
413 $ a( (j-1)*nb+1, j*nb+1 ), lda,
414 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
415 END IF
416
417
418
419
420 DO k = 1, nb
421 DO i = 1, kb
422 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
423 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
424 END DO
425 END DO
426 CALL slaset(
'Lower', kb, nb, zero, one,
427 $ a( j*nb+1, (j+1)*nb+1), lda )
428
429
430
431 DO k = 1, kb
432
433 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
434
435 i1 = (j+1)*nb+k
436 i2 = ipiv( (j+1)*nb+k )
437 IF( i1.NE.i2 ) THEN
438
439 CALL sswap( k-1, a( (j+1)*nb+1, i1 ), 1,
440 $ a( (j+1)*nb+1, i2 ), 1 )
441
442 IF( i2.GT.(i1+1) )
443 $
CALL sswap( i2-i1-1, a( i1, i1+1 ), lda,
444 $ a( i1+1, i2 ), 1 )
445
446 IF( i2.LT.n )
447 $
CALL sswap( n-i2, a( i1, i2+1 ), lda,
448 $ a( i2, i2+1 ), lda )
449
450 piv = a( i1, i1 )
451 a( i1, i1 ) = a( i2, i2 )
452 a( i2, i2 ) = piv
453
454 IF( j.GT.0 ) THEN
455 CALL sswap( j*nb, a( 1, i1 ), 1,
456 $ a( 1, i2 ), 1 )
457 END IF
458 ENDIF
459 END DO
460 END IF
461 END DO
462 ELSE
463
464
465
466
467
468 DO j = 0, nt-1
469
470
471
472 kb = min(nb, n-j*nb)
473 DO i = 1, j-1
474 IF( i.EQ.1 ) THEN
475
476 IF( i .EQ. (j-1) ) THEN
477 jb = nb+kb
478 ELSE
479 jb = 2*nb
480 END IF
481 CALL sgemm(
'NoTranspose',
'Transpose',
482 $ nb, kb, jb,
483 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
484 $ a( j*nb+1, (i-1)*nb+1 ), lda,
485 $ zero, work( i*nb+1 ), n )
486 ELSE
487
488 IF( i .EQ. j-1) THEN
489 jb = 2*nb+kb
490 ELSE
491 jb = 3*nb
492 END IF
493 CALL sgemm(
'NoTranspose',
'Transpose',
494 $ nb, kb, jb,
495 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
496 $ ldtb-1,
497 $ a( j*nb+1, (i-2)*nb+1 ), lda,
498 $ zero, work( i*nb+1 ), n )
499 END IF
500 END DO
501
502
503
504 CALL slacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
505 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
506 IF( j.GT.1 ) THEN
507
508 CALL sgemm(
'NoTranspose',
'NoTranspose',
509 $ kb, kb, (j-1)*nb,
510 $ -one, a( j*nb+1, 1 ), lda,
511 $ work( nb+1 ), n,
512 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
513
514 CALL sgemm(
'NoTranspose',
'NoTranspose',
515 $ kb, nb, kb,
516 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
517 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
518 $ zero, work( 1 ), n )
519 CALL sgemm(
'NoTranspose',
'Transpose',
520 $ kb, kb, nb,
521 $ -one, work( 1 ), n,
522 $ a( j*nb+1, (j-2)*nb+1 ), lda,
523 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
524 END IF
525 IF( j.GT.0 ) THEN
526 CALL ssygst( 1,
'Lower', kb,
527 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
528 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
529 END IF
530
531
532
533 DO i = 1, kb
534 DO k = i+1, kb
535 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
536 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
537 END DO
538 END DO
539
540 IF( j.LT.nt-1 ) THEN
541 IF( j.GT.0 ) THEN
542
543
544
545 IF( j.EQ.1 ) THEN
546 CALL sgemm(
'NoTranspose',
'Transpose',
547 $ kb, kb, kb,
548 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
549 $ a( j*nb+1, (j-1)*nb+1 ), lda,
550 $ zero, work( j*nb+1 ), n )
551 ELSE
552 CALL sgemm(
'NoTranspose',
'Transpose',
553 $ kb, kb, nb+kb,
554 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
555 $ ldtb-1,
556 $ a( j*nb+1, (j-2)*nb+1 ), lda,
557 $ zero, work( j*nb+1 ), n )
558 END IF
559
560
561
562 CALL sgemm(
'NoTranspose',
'NoTranspose',
563 $ n-(j+1)*nb, nb, j*nb,
564 $ -one, a( (j+1)*nb+1, 1 ), lda,
565 $ work( nb+1 ), n,
566 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
567 END IF
568
569
570
571 CALL sgetrf( n-(j+1)*nb, nb,
572 $ a( (j+1)*nb+1, j*nb+1 ), lda,
573 $ ipiv( (j+1)*nb+1 ), iinfo )
574
575
576
577
578
579
580 kb = min(nb, n-(j+1)*nb)
581 CALL slaset(
'Full', kb, nb, zero, zero,
582 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
583 CALL slacpy(
'Upper', kb, nb,
584 $ a( (j+1)*nb+1, j*nb+1 ), lda,
585 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
586 IF( j.GT.0 ) THEN
587 CALL strsm(
'R',
'L',
'T',
'U', kb, nb, one,
588 $ a( j*nb+1, (j-1)*nb+1 ), lda,
589 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
590 END IF
591
592
593
594
595 DO k = 1, nb
596 DO i = 1, kb
597 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
598 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
599 END DO
600 END DO
601 CALL slaset(
'Upper', kb, nb, zero, one,
602 $ a( (j+1)*nb+1, j*nb+1), lda )
603
604
605
606 DO k = 1, kb
607
608 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
609
610 i1 = (j+1)*nb+k
611 i2 = ipiv( (j+1)*nb+k )
612 IF( i1.NE.i2 ) THEN
613
614 CALL sswap( k-1, a( i1, (j+1)*nb+1 ), lda,
615 $ a( i2, (j+1)*nb+1 ), lda )
616
617 IF( i2.GT.(i1+1) )
618 $
CALL sswap( i2-i1-1, a( i1+1, i1 ), 1,
619 $ a( i2, i1+1 ), lda )
620
621 IF( i2.LT.n )
622 $
CALL sswap( n-i2, a( i2+1, i1 ), 1,
623 $ a( i2+1, i2 ), 1 )
624
625 piv = a( i1, i1 )
626 a( i1, i1 ) = a( i2, i2 )
627 a( i2, i2 ) = piv
628
629 IF( j.GT.0 ) THEN
630 CALL sswap( j*nb, a( i1, 1 ), lda,
631 $ a( i2, 1 ), lda )
632 END IF
633 ENDIF
634 END DO
635
636
637
638
639
640 END IF
641 END DO
642 END IF
643
644
645 CALL sgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
646
647 RETURN
648
649
650
subroutine xerbla(srname, info)
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
subroutine sgbtrf(m, n, kl, ku, ab, ldab, ipiv, info)
SGBTRF
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
subroutine sgetrf(m, n, a, lda, ipiv, info)
SGETRF
subroutine ssygst(itype, uplo, n, a, lda, b, ldb, info)
SSYGST
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
logical function lsame(ca, cb)
LSAME
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine sswap(n, sx, incx, sy, incy)
SSWAP
subroutine strsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRSM