160
161
162
163
164
165 IMPLICIT NONE
166
167
168 CHARACTER UPLO
169 INTEGER N, LDA, LTB, LWORK, INFO
170
171
172 INTEGER IPIV( * ), IPIV2( * )
173 COMPLEX A( LDA, * ), TB( * ), WORK( * )
174
175
176
177
178 COMPLEX CZERO, CONE
179 parameter( czero = ( 0.0e+0, 0.0e+0 ),
180 $ cone = ( 1.0e+0, 0.0e+0 ) )
181
182
183 LOGICAL UPPER, TQUERY, WQUERY
184 INTEGER I, J, K, I1, I2, TD
185 INTEGER LDTB, NB, KB, JB, NT, IINFO
186 COMPLEX PIV
187
188
189 LOGICAL LSAME
190 INTEGER ILAENV
191 REAL SROUNDUP_LWORK
193
194
197
198
199 INTRINSIC min, max
200
201
202
203
204
205 info = 0
206 upper =
lsame( uplo,
'U' )
207 wquery = ( lwork.EQ.-1 )
208 tquery = ( ltb.EQ.-1 )
209 IF( .NOT.upper .AND. .NOT.
lsame( uplo,
'L' ) )
THEN
210 info = -1
211 ELSE IF( n.LT.0 ) THEN
212 info = -2
213 ELSE IF( lda.LT.max( 1, n ) ) THEN
214 info = -4
215 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery ) THEN
216 info = -6
217 ELSE IF ( lwork .LT. n .AND. .NOT.wquery ) THEN
218 info = -10
219 END IF
220
221 IF( info.NE.0 ) THEN
222 CALL xerbla(
'CSYTRF_AA_2STAGE', -info )
223 RETURN
224 END IF
225
226
227
228 nb =
ilaenv( 1,
'CSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
229 IF( info.EQ.0 ) THEN
230 IF( tquery ) THEN
231 tb( 1 ) = (3*nb+1)*n
232 END IF
233 IF( wquery ) THEN
235 END IF
236 END IF
237 IF( tquery .OR. wquery ) THEN
238 RETURN
239 END IF
240
241
242
243 IF ( n.EQ.0 ) THEN
244 RETURN
245 ENDIF
246
247
248
249 ldtb = ltb/n
250 IF( ldtb .LT. 3*nb+1 ) THEN
251 nb = (ldtb-1)/3
252 END IF
253 IF( lwork .LT. nb*n ) THEN
254 nb = lwork/n
255 END IF
256
257
258
259 nt = (n+nb-1)/nb
260 td = 2*nb
261 kb = min(nb, n)
262
263
264
265 DO j = 1, kb
266 ipiv( j ) = j
267 END DO
268
269
270
271 tb( 1 ) = nb
272
273 IF( upper ) THEN
274
275
276
277
278
279 DO j = 0, nt-1
280
281
282
283 kb = min(nb, n-j*nb)
284 DO i = 1, j-1
285 IF( i.EQ.1 ) THEN
286
287 IF( i .EQ. (j-1) ) THEN
288 jb = nb+kb
289 ELSE
290 jb = 2*nb
291 END IF
292 CALL cgemm(
'NoTranspose',
'NoTranspose',
293 $ nb, kb, jb,
294 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
295 $ a( (i-1)*nb+1, j*nb+1 ), lda,
296 $ czero, work( i*nb+1 ), n )
297 ELSE
298
299 IF( i .EQ. j-1) THEN
300 jb = 2*nb+kb
301 ELSE
302 jb = 3*nb
303 END IF
304 CALL cgemm(
'NoTranspose',
'NoTranspose',
305 $ nb, kb, jb,
306 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
307 $ ldtb-1,
308 $ a( (i-2)*nb+1, j*nb+1 ), lda,
309 $ czero, work( i*nb+1 ), n )
310 END IF
311 END DO
312
313
314
315 CALL clacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
316 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
317 IF( j.GT.1 ) THEN
318
319 CALL cgemm(
'Transpose',
'NoTranspose',
320 $ kb, kb, (j-1)*nb,
321 $ -cone, a( 1, j*nb+1 ), lda,
322 $ work( nb+1 ), n,
323 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
324
325 CALL cgemm(
'Transpose',
'NoTranspose',
326 $ kb, nb, kb,
327 $ cone, a( (j-1)*nb+1, j*nb+1 ), lda,
328 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
329 $ czero, work( 1 ), n )
330 CALL cgemm(
'NoTranspose',
'NoTranspose',
331 $ kb, kb, nb,
332 $ -cone, work( 1 ), n,
333 $ a( (j-2)*nb+1, j*nb+1 ), lda,
334 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
335 END IF
336
337
338
339 DO i = 1, kb
340 DO k = i+1, kb
341 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
342 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
343 END DO
344 END DO
345 IF( j.GT.0 ) THEN
346
347
348
349 CALL ctrsm(
'L',
'U',
'T',
'N', kb, kb, cone,
350 $ a( (j-1)*nb+1, j*nb+1 ), lda,
351 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
352 CALL ctrsm(
'R',
'U',
'N',
'N', kb, kb, cone,
353 $ a( (j-1)*nb+1, j*nb+1 ), lda,
354 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
355 END IF
356
357 IF( j.LT.nt-1 ) THEN
358 IF( j.GT.0 ) THEN
359
360
361
362 IF( j.EQ.1 ) THEN
363 CALL cgemm(
'NoTranspose',
'NoTranspose',
364 $ kb, kb, kb,
365 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
366 $ a( (j-1)*nb+1, j*nb+1 ), lda,
367 $ czero, work( j*nb+1 ), n )
368 ELSE
369 CALL cgemm(
'NoTranspose',
'NoTranspose',
370 $ kb, kb, nb+kb,
371 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
372 $ ldtb-1,
373 $ a( (j-2)*nb+1, j*nb+1 ), lda,
374 $ czero, work( j*nb+1 ), n )
375 END IF
376
377
378
379 CALL cgemm(
'Transpose',
'NoTranspose',
380 $ nb, n-(j+1)*nb, j*nb,
381 $ -cone, work( nb+1 ), n,
382 $ a( 1, (j+1)*nb+1 ), lda,
383 $ cone, a( j*nb+1, (j+1)*nb+1 ), lda )
384 END IF
385
386
387
388 DO k = 1, nb
389 CALL ccopy( n-(j+1)*nb,
390 $ a( j*nb+k, (j+1)*nb+1 ), lda,
391 $ work( 1+(k-1)*n ), 1 )
392 END DO
393
394
395
396 CALL cgetrf( n-(j+1)*nb, nb,
397 $ work, n,
398 $ ipiv( (j+1)*nb+1 ), iinfo )
399
400
401
402
403
404
405 DO k = 1, nb
406 CALL ccopy( n-(j+1)*nb,
407 $ work( 1+(k-1)*n ), 1,
408 $ a( j*nb+k, (j+1)*nb+1 ), lda )
409 END DO
410
411
412
413 kb = min(nb, n-(j+1)*nb)
414 CALL claset(
'Full', kb, nb, czero, czero,
415 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
416 CALL clacpy(
'Upper', kb, nb,
417 $ work, n,
418 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
419 IF( j.GT.0 ) THEN
420 CALL ctrsm(
'R',
'U',
'N',
'U', kb, nb, cone,
421 $ a( (j-1)*nb+1, j*nb+1 ), lda,
422 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
423 END IF
424
425
426
427
428 DO k = 1, nb
429 DO i = 1, kb
430 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
431 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
432 END DO
433 END DO
434 CALL claset(
'Lower', kb, nb, czero, cone,
435 $ a( j*nb+1, (j+1)*nb+1), lda )
436
437
438
439 DO k = 1, kb
440
441 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
442
443 i1 = (j+1)*nb+k
444 i2 = ipiv( (j+1)*nb+k )
445 IF( i1.NE.i2 ) THEN
446
447 CALL cswap( k-1, a( (j+1)*nb+1, i1 ), 1,
448 $ a( (j+1)*nb+1, i2 ), 1 )
449
450 IF( i2.GT.(i1+1) )
451 $
CALL cswap( i2-i1-1, a( i1, i1+1 ), lda,
452 $ a( i1+1, i2 ), 1 )
453
454 IF( i2.LT.n )
455 $
CALL cswap( n-i2, a( i1, i2+1 ), lda,
456 $ a( i2, i2+1 ), lda )
457
458 piv = a( i1, i1 )
459 a( i1, i1 ) = a( i2, i2 )
460 a( i2, i2 ) = piv
461
462 IF( j.GT.0 ) THEN
463 CALL cswap( j*nb, a( 1, i1 ), 1,
464 $ a( 1, i2 ), 1 )
465 END IF
466 ENDIF
467 END DO
468 END IF
469 END DO
470 ELSE
471
472
473
474
475
476 DO j = 0, nt-1
477
478
479
480 kb = min(nb, n-j*nb)
481 DO i = 1, j-1
482 IF( i.EQ.1 ) THEN
483
484 IF( i .EQ. (j-1) ) THEN
485 jb = nb+kb
486 ELSE
487 jb = 2*nb
488 END IF
489 CALL cgemm(
'NoTranspose',
'Transpose',
490 $ nb, kb, jb,
491 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
492 $ a( j*nb+1, (i-1)*nb+1 ), lda,
493 $ czero, work( i*nb+1 ), n )
494 ELSE
495
496 IF( i .EQ. (j-1) ) THEN
497 jb = 2*nb+kb
498 ELSE
499 jb = 3*nb
500 END IF
501 CALL cgemm(
'NoTranspose',
'Transpose',
502 $ nb, kb, jb,
503 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
504 $ ldtb-1,
505 $ a( j*nb+1, (i-2)*nb+1 ), lda,
506 $ czero, work( i*nb+1 ), n )
507 END IF
508 END DO
509
510
511
512 CALL clacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
513 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
514 IF( j.GT.1 ) THEN
515
516 CALL cgemm(
'NoTranspose',
'NoTranspose',
517 $ kb, kb, (j-1)*nb,
518 $ -cone, a( j*nb+1, 1 ), lda,
519 $ work( nb+1 ), n,
520 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
521
522 CALL cgemm(
'NoTranspose',
'NoTranspose',
523 $ kb, nb, kb,
524 $ cone, a( j*nb+1, (j-1)*nb+1 ), lda,
525 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
526 $ czero, work( 1 ), n )
527 CALL cgemm(
'NoTranspose',
'Transpose',
528 $ kb, kb, nb,
529 $ -cone, work( 1 ), n,
530 $ a( j*nb+1, (j-2)*nb+1 ), lda,
531 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
532 END IF
533
534
535
536 DO i = 1, kb
537 DO k = i+1, kb
538 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
539 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
540 END DO
541 END DO
542 IF( j.GT.0 ) THEN
543
544
545
546 CALL ctrsm(
'L',
'L',
'N',
'N', kb, kb, cone,
547 $ a( j*nb+1, (j-1)*nb+1 ), lda,
548 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
549 CALL ctrsm(
'R',
'L',
'T',
'N', kb, kb, cone,
550 $ a( j*nb+1, (j-1)*nb+1 ), lda,
551 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
552 END IF
553
554
555
556 DO i = 1, kb
557 DO k = i+1, kb
558 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
559 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
560 END DO
561 END DO
562
563 IF( j.LT.nt-1 ) THEN
564 IF( j.GT.0 ) THEN
565
566
567
568 IF( j.EQ.1 ) THEN
569 CALL cgemm(
'NoTranspose',
'Transpose',
570 $ kb, kb, kb,
571 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
572 $ a( j*nb+1, (j-1)*nb+1 ), lda,
573 $ czero, work( j*nb+1 ), n )
574 ELSE
575 CALL cgemm(
'NoTranspose',
'Transpose',
576 $ kb, kb, nb+kb,
577 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
578 $ ldtb-1,
579 $ a( j*nb+1, (j-2)*nb+1 ), lda,
580 $ czero, work( j*nb+1 ), n )
581 END IF
582
583
584
585 CALL cgemm(
'NoTranspose',
'NoTranspose',
586 $ n-(j+1)*nb, nb, j*nb,
587 $ -cone, a( (j+1)*nb+1, 1 ), lda,
588 $ work( nb+1 ), n,
589 $ cone, a( (j+1)*nb+1, j*nb+1 ), lda )
590 END IF
591
592
593
594 CALL cgetrf( n-(j+1)*nb, nb,
595 $ a( (j+1)*nb+1, j*nb+1 ), lda,
596 $ ipiv( (j+1)*nb+1 ), iinfo )
597
598
599
600
601
602
603 kb = min(nb, n-(j+1)*nb)
604 CALL claset(
'Full', kb, nb, czero, czero,
605 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
606 CALL clacpy(
'Upper', kb, nb,
607 $ a( (j+1)*nb+1, j*nb+1 ), lda,
608 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
609 IF( j.GT.0 ) THEN
610 CALL ctrsm(
'R',
'L',
'T',
'U', kb, nb, cone,
611 $ a( j*nb+1, (j-1)*nb+1 ), lda,
612 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
613 END IF
614
615
616
617
618 DO k = 1, nb
619 DO i = 1, kb
620 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
621 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
622 END DO
623 END DO
624 CALL claset(
'Upper', kb, nb, czero, cone,
625 $ a( (j+1)*nb+1, j*nb+1 ), lda )
626
627
628
629 DO k = 1, kb
630
631 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
632
633 i1 = (j+1)*nb+k
634 i2 = ipiv( (j+1)*nb+k )
635 IF( i1.NE.i2 ) THEN
636
637 CALL cswap( k-1, a( i1, (j+1)*nb+1 ), lda,
638 $ a( i2, (j+1)*nb+1 ), lda )
639
640 IF( i2.GT.(i1+1) )
641 $
CALL cswap( i2-i1-1, a( i1+1, i1 ), 1,
642 $ a( i2, i1+1 ), lda )
643
644 IF( i2.LT.n )
645 $
CALL cswap( n-i2, a( i2+1, i1 ), 1,
646 $ a( i2+1, i2 ), 1 )
647
648 piv = a( i1, i1 )
649 a( i1, i1 ) = a( i2, i2 )
650 a( i2, i2 ) = piv
651
652 IF( j.GT.0 ) THEN
653 CALL cswap( j*nb, a( i1, 1 ), lda,
654 $ a( i2, 1 ), lda )
655 END IF
656 ENDIF
657 END DO
658
659
660
661
662
663 END IF
664 END DO
665 END IF
666
667
668 CALL cgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
669
670 RETURN
671
672
673
subroutine xerbla(srname, info)
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
subroutine cgbtrf(m, n, kl, ku, ab, ldab, ipiv, info)
CGBTRF
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
subroutine cgetrf(m, n, a, lda, ipiv, info)
CGETRF
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
subroutine clacpy(uplo, m, n, a, lda, b, ldb)
CLACPY copies all or part of one two-dimensional array to another.
subroutine claset(uplo, m, n, alpha, beta, a, lda)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
logical function lsame(ca, cb)
LSAME
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
subroutine ctrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
CTRSM