LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ clahef_rook()

subroutine clahef_rook ( character  uplo,
integer  n,
integer  nb,
integer  kb,
complex, dimension( lda, * )  a,
integer  lda,
integer, dimension( * )  ipiv,
complex, dimension( ldw, * )  w,
integer  ldw,
integer  info 
)

Download CLAHEF_ROOK + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CLAHEF_ROOK computes a partial factorization of a complex Hermitian
 matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting
 method. The partial factorization has the form:

 A  =  ( I  U12 ) ( A11  0  ) (  I      0     )  if UPLO = 'U', or:
       ( 0  U22 ) (  0   D  ) ( U12**H U22**H )

 A  =  ( L11  0 ) (  D   0  ) ( L11**H L21**H )  if UPLO = 'L'
       ( L21  I ) (  0  A22 ) (  0      I     )

 where the order of D is at most NB. The actual order is returned in
 the argument KB, and is either NB or NB-1, or N if N <= NB.
 Note that U**H denotes the conjugate transpose of U.

 CLAHEF_ROOK is an auxiliary routine called by CHETRF_ROOK. It uses
 blocked code (calling Level 3 BLAS) to update the submatrix
 A11 (if UPLO = 'U') or A22 (if UPLO = 'L').
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          Specifies whether the upper or lower triangular part of the
          Hermitian matrix A is stored:
          = 'U':  Upper triangular
          = 'L':  Lower triangular
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in]NB
          NB is INTEGER
          The maximum number of columns of the matrix A that should be
          factored.  NB should be at least 2 to allow for 2-by-2 pivot
          blocks.
[out]KB
          KB is INTEGER
          The number of columns of A that were actually factored.
          KB is either NB-1 or NB, or N if N <= NB.
[in,out]A
          A is COMPLEX array, dimension (LDA,N)
          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
          n-by-n upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading n-by-n lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.
          On exit, A contains details of the partial factorization.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          Details of the interchanges and the block structure of D.

          If UPLO = 'U':
             Only the last KB elements of IPIV are set.

             If IPIV(k) > 0, then rows and columns k and IPIV(k) were
             interchanged and D(k,k) is a 1-by-1 diagonal block.

             If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and
             columns k and -IPIV(k) were interchanged and rows and
             columns k-1 and -IPIV(k-1) were inerchaged,
             D(k-1:k,k-1:k) is a 2-by-2 diagonal block.

          If UPLO = 'L':
             Only the first KB elements of IPIV are set.

             If IPIV(k) > 0, then rows and columns k and IPIV(k)
             were interchanged and D(k,k) is a 1-by-1 diagonal block.

             If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and
             columns k and -IPIV(k) were interchanged and rows and
             columns k+1 and -IPIV(k+1) were inerchaged,
             D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
[out]W
          W is COMPLEX array, dimension (LDW,NB)
[in]LDW
          LDW is INTEGER
          The leading dimension of the array W.  LDW >= max(1,N).
[out]INFO
          INFO is INTEGER
          = 0: successful exit
          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
               has been completed, but the block diagonal matrix D is
               exactly singular.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
  November 2013, Igor Kozachenko,
                  Computer Science Division,
                  University of California, Berkeley

  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas,
                  School of Mathematics,
                  University of Manchester

Definition at line 182 of file clahef_rook.f.

184*
185* -- LAPACK computational routine --
186* -- LAPACK is a software package provided by Univ. of Tennessee, --
187* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
188*
189* .. Scalar Arguments ..
190 CHARACTER UPLO
191 INTEGER INFO, KB, LDA, LDW, N, NB
192* ..
193* .. Array Arguments ..
194 INTEGER IPIV( * )
195 COMPLEX A( LDA, * ), W( LDW, * )
196* ..
197*
198* =====================================================================
199*
200* .. Parameters ..
201 REAL ZERO, ONE
202 parameter( zero = 0.0e+0, one = 1.0e+0 )
203 COMPLEX CONE
204 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
205 REAL EIGHT, SEVTEN
206 parameter( eight = 8.0e+0, sevten = 17.0e+0 )
207* ..
208* .. Local Scalars ..
209 LOGICAL DONE
210 INTEGER IMAX, ITEMP, II, J, JB, JJ, JMAX, JP1, JP2, K,
211 $ KK, KKW, KP, KSTEP, KW, P
212 REAL ABSAKK, ALPHA, COLMAX, STEMP, R1, ROWMAX, T,
213 $ SFMIN
214 COMPLEX D11, D21, D22, Z
215* ..
216* .. External Functions ..
217 LOGICAL LSAME
218 INTEGER ICAMAX
219 REAL SLAMCH
220 EXTERNAL lsame, icamax, slamch
221* ..
222* .. External Subroutines ..
223 EXTERNAL ccopy, csscal, cgemm, cgemv, clacgv, cswap
224* ..
225* .. Intrinsic Functions ..
226 INTRINSIC abs, conjg, aimag, max, min, real, sqrt
227* ..
228* .. Statement Functions ..
229 REAL CABS1
230* ..
231* .. Statement Function definitions ..
232 cabs1( z ) = abs( real( z ) ) + abs( aimag( z ) )
233* ..
234* .. Executable Statements ..
235*
236 info = 0
237*
238* Initialize ALPHA for use in choosing pivot block size.
239*
240 alpha = ( one+sqrt( sevten ) ) / eight
241*
242* Compute machine safe minimum
243*
244 sfmin = slamch( 'S' )
245*
246 IF( lsame( uplo, 'U' ) ) THEN
247*
248* Factorize the trailing columns of A using the upper triangle
249* of A and working backwards, and compute the matrix W = U12*D
250* for use in updating A11 (note that conjg(W) is actually stored)
251*
252* K is the main loop index, decreasing from N in steps of 1 or 2
253*
254 k = n
255 10 CONTINUE
256*
257* KW is the column of W which corresponds to column K of A
258*
259 kw = nb + k - n
260*
261* Exit from loop
262*
263 IF( ( k.LE.n-nb+1 .AND. nb.LT.n ) .OR. k.LT.1 )
264 $ GO TO 30
265*
266 kstep = 1
267 p = k
268*
269* Copy column K of A to column KW of W and update it
270*
271 IF( k.GT.1 )
272 $ CALL ccopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 )
273 w( k, kw ) = real( a( k, k ) )
274 IF( k.LT.n ) THEN
275 CALL cgemv( 'No transpose', k, n-k, -cone, a( 1, k+1 ), lda,
276 $ w( k, kw+1 ), ldw, cone, w( 1, kw ), 1 )
277 w( k, kw ) = real( w( k, kw ) )
278 END IF
279*
280* Determine rows and columns to be interchanged and whether
281* a 1-by-1 or 2-by-2 pivot block will be used
282*
283 absakk = abs( real( w( k, kw ) ) )
284*
285* IMAX is the row-index of the largest off-diagonal element in
286* column K, and COLMAX is its absolute value.
287* Determine both COLMAX and IMAX.
288*
289 IF( k.GT.1 ) THEN
290 imax = icamax( k-1, w( 1, kw ), 1 )
291 colmax = cabs1( w( imax, kw ) )
292 ELSE
293 colmax = zero
294 END IF
295*
296 IF( max( absakk, colmax ).EQ.zero ) THEN
297*
298* Column K is zero or underflow: set INFO and continue
299*
300 IF( info.EQ.0 )
301 $ info = k
302 kp = k
303 a( k, k ) = real( w( k, kw ) )
304 IF( k.GT.1 )
305 $ CALL ccopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 )
306 ELSE
307*
308* ============================================================
309*
310* BEGIN pivot search
311*
312* Case(1)
313* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX
314* (used to handle NaN and Inf)
315 IF( .NOT.( absakk.LT.alpha*colmax ) ) THEN
316*
317* no interchange, use 1-by-1 pivot block
318*
319 kp = k
320*
321 ELSE
322*
323* Lop until pivot found
324*
325 done = .false.
326*
327 12 CONTINUE
328*
329* BEGIN pivot search loop body
330*
331*
332* Copy column IMAX to column KW-1 of W and update it
333*
334 IF( imax.GT.1 )
335 $ CALL ccopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ),
336 $ 1 )
337 w( imax, kw-1 ) = real( a( imax, imax ) )
338*
339 CALL ccopy( k-imax, a( imax, imax+1 ), lda,
340 $ w( imax+1, kw-1 ), 1 )
341 CALL clacgv( k-imax, w( imax+1, kw-1 ), 1 )
342*
343 IF( k.LT.n ) THEN
344 CALL cgemv( 'No transpose', k, n-k, -cone,
345 $ a( 1, k+1 ), lda, w( imax, kw+1 ), ldw,
346 $ cone, w( 1, kw-1 ), 1 )
347 w( imax, kw-1 ) = real( w( imax, kw-1 ) )
348 END IF
349*
350* JMAX is the column-index of the largest off-diagonal
351* element in row IMAX, and ROWMAX is its absolute value.
352* Determine both ROWMAX and JMAX.
353*
354 IF( imax.NE.k ) THEN
355 jmax = imax + icamax( k-imax, w( imax+1, kw-1 ),
356 $ 1 )
357 rowmax = cabs1( w( jmax, kw-1 ) )
358 ELSE
359 rowmax = zero
360 END IF
361*
362 IF( imax.GT.1 ) THEN
363 itemp = icamax( imax-1, w( 1, kw-1 ), 1 )
364 stemp = cabs1( w( itemp, kw-1 ) )
365 IF( stemp.GT.rowmax ) THEN
366 rowmax = stemp
367 jmax = itemp
368 END IF
369 END IF
370*
371* Case(2)
372* Equivalent to testing for
373* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX
374* (used to handle NaN and Inf)
375*
376 IF( .NOT.( abs( real( w( imax,kw-1 ) ) )
377 $ .LT.alpha*rowmax ) ) THEN
378*
379* interchange rows and columns K and IMAX,
380* use 1-by-1 pivot block
381*
382 kp = imax
383*
384* copy column KW-1 of W to column KW of W
385*
386 CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
387*
388 done = .true.
389*
390* Case(3)
391* Equivalent to testing for ROWMAX.EQ.COLMAX,
392* (used to handle NaN and Inf)
393*
394 ELSE IF( ( p.EQ.jmax ) .OR. ( rowmax.LE.colmax ) )
395 $ THEN
396*
397* interchange rows and columns K-1 and IMAX,
398* use 2-by-2 pivot block
399*
400 kp = imax
401 kstep = 2
402 done = .true.
403*
404* Case(4)
405 ELSE
406*
407* Pivot not found: set params and repeat
408*
409 p = imax
410 colmax = rowmax
411 imax = jmax
412*
413* Copy updated JMAXth (next IMAXth) column to Kth of W
414*
415 CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
416*
417 END IF
418*
419*
420* END pivot search loop body
421*
422 IF( .NOT.done ) GOTO 12
423*
424 END IF
425*
426* END pivot search
427*
428* ============================================================
429*
430* KK is the column of A where pivoting step stopped
431*
432 kk = k - kstep + 1
433*
434* KKW is the column of W which corresponds to column KK of A
435*
436 kkw = nb + kk - n
437*
438* Interchange rows and columns P and K.
439* Updated column P is already stored in column KW of W.
440*
441 IF( ( kstep.EQ.2 ) .AND. ( p.NE.k ) ) THEN
442*
443* Copy non-updated column K to column P of submatrix A
444* at step K. No need to copy element into columns
445* K and K-1 of A for 2-by-2 pivot, since these columns
446* will be later overwritten.
447*
448 a( p, p ) = real( a( k, k ) )
449 CALL ccopy( k-1-p, a( p+1, k ), 1, a( p, p+1 ),
450 $ lda )
451 CALL clacgv( k-1-p, a( p, p+1 ), lda )
452 IF( p.GT.1 )
453 $ CALL ccopy( p-1, a( 1, k ), 1, a( 1, p ), 1 )
454*
455* Interchange rows K and P in the last K+1 to N columns of A
456* (columns K and K-1 of A for 2-by-2 pivot will be
457* later overwritten). Interchange rows K and P
458* in last KKW to NB columns of W.
459*
460 IF( k.LT.n )
461 $ CALL cswap( n-k, a( k, k+1 ), lda, a( p, k+1 ),
462 $ lda )
463 CALL cswap( n-kk+1, w( k, kkw ), ldw, w( p, kkw ),
464 $ ldw )
465 END IF
466*
467* Interchange rows and columns KP and KK.
468* Updated column KP is already stored in column KKW of W.
469*
470 IF( kp.NE.kk ) THEN
471*
472* Copy non-updated column KK to column KP of submatrix A
473* at step K. No need to copy element into column K
474* (or K and K-1 for 2-by-2 pivot) of A, since these columns
475* will be later overwritten.
476*
477 a( kp, kp ) = real( a( kk, kk ) )
478 CALL ccopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),
479 $ lda )
480 CALL clacgv( kk-1-kp, a( kp, kp+1 ), lda )
481 IF( kp.GT.1 )
482 $ CALL ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 )
483*
484* Interchange rows KK and KP in last K+1 to N columns of A
485* (columns K (or K and K-1 for 2-by-2 pivot) of A will be
486* later overwritten). Interchange rows KK and KP
487* in last KKW to NB columns of W.
488*
489 IF( k.LT.n )
490 $ CALL cswap( n-k, a( kk, k+1 ), lda, a( kp, k+1 ),
491 $ lda )
492 CALL cswap( n-kk+1, w( kk, kkw ), ldw, w( kp, kkw ),
493 $ ldw )
494 END IF
495*
496 IF( kstep.EQ.1 ) THEN
497*
498* 1-by-1 pivot block D(k): column kw of W now holds
499*
500* W(kw) = U(k)*D(k),
501*
502* where U(k) is the k-th column of U
503*
504* (1) Store subdiag. elements of column U(k)
505* and 1-by-1 block D(k) in column k of A.
506* (NOTE: Diagonal element U(k,k) is a UNIT element
507* and not stored)
508* A(k,k) := D(k,k) = W(k,kw)
509* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k)
510*
511* (NOTE: No need to use for Hermitian matrix
512* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal
513* element D(k,k) from W (potentially saves only one load))
514 CALL ccopy( k, w( 1, kw ), 1, a( 1, k ), 1 )
515 IF( k.GT.1 ) THEN
516*
517* (NOTE: No need to check if A(k,k) is NOT ZERO,
518* since that was ensured earlier in pivot search:
519* case A(k,k) = 0 falls into 2x2 pivot case(3))
520*
521* Handle division by a small number
522*
523 t = real( a( k, k ) )
524 IF( abs( t ).GE.sfmin ) THEN
525 r1 = one / t
526 CALL csscal( k-1, r1, a( 1, k ), 1 )
527 ELSE
528 DO 14 ii = 1, k-1
529 a( ii, k ) = a( ii, k ) / t
530 14 CONTINUE
531 END IF
532*
533* (2) Conjugate column W(kw)
534*
535 CALL clacgv( k-1, w( 1, kw ), 1 )
536 END IF
537*
538 ELSE
539*
540* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold
541*
542* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k)
543*
544* where U(k) and U(k-1) are the k-th and (k-1)-th columns
545* of U
546*
547* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2
548* block D(k-1:k,k-1:k) in columns k-1 and k of A.
549* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT
550* block and not stored)
551* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw)
552* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) =
553* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) )
554*
555 IF( k.GT.2 ) THEN
556*
557* Factor out the columns of the inverse of 2-by-2 pivot
558* block D, so that each column contains 1, to reduce the
559* number of FLOPS when we multiply panel
560* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
561*
562* D**(-1) = ( d11 cj(d21) )**(-1) =
563* ( d21 d22 )
564*
565* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
566* ( (-d21) ( d11 ) )
567*
568* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
569*
570* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
571* ( ( -1 ) ( d11/conj(d21) ) )
572*
573* = 1/(|d21|**2) * 1/(D22*D11-1) *
574*
575* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
576* ( ( -1 ) ( D22 ) )
577*
578* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
579* ( ( -1 ) ( D22 ) )
580*
581* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
582* ( ( -1 ) ( D22 ) )
583*
584* Handle division by a small number. (NOTE: order of
585* operations is important)
586*
587* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) )
588* ( (( -1 ) ) (( D22 ) ) ),
589*
590* where D11 = d22/d21,
591* D22 = d11/conj(d21),
592* D21 = d21,
593* T = 1/(D22*D11-1).
594*
595* (NOTE: No need to check for division by ZERO,
596* since that was ensured earlier in pivot search:
597* (a) d21 != 0 in 2x2 pivot case(4),
598* since |d21| should be larger than |d11| and |d22|;
599* (b) (D22*D11 - 1) != 0, since from (a),
600* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
601*
602 d21 = w( k-1, kw )
603 d11 = w( k, kw ) / conjg( d21 )
604 d22 = w( k-1, kw-1 ) / d21
605 t = one / ( real( d11*d22 )-one )
606*
607* Update elements in columns A(k-1) and A(k) as
608* dot products of rows of ( W(kw-1) W(kw) ) and columns
609* of D**(-1)
610*
611 DO 20 j = 1, k - 2
612 a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /
613 $ d21 )
614 a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /
615 $ conjg( d21 ) )
616 20 CONTINUE
617 END IF
618*
619* Copy D(k) to A
620*
621 a( k-1, k-1 ) = w( k-1, kw-1 )
622 a( k-1, k ) = w( k-1, kw )
623 a( k, k ) = w( k, kw )
624*
625* (2) Conjugate columns W(kw) and W(kw-1)
626*
627 CALL clacgv( k-1, w( 1, kw ), 1 )
628 CALL clacgv( k-2, w( 1, kw-1 ), 1 )
629*
630 END IF
631*
632 END IF
633*
634* Store details of the interchanges in IPIV
635*
636 IF( kstep.EQ.1 ) THEN
637 ipiv( k ) = kp
638 ELSE
639 ipiv( k ) = -p
640 ipiv( k-1 ) = -kp
641 END IF
642*
643* Decrease K and return to the start of the main loop
644*
645 k = k - kstep
646 GO TO 10
647*
648 30 CONTINUE
649*
650* Update the upper triangle of A11 (= A(1:k,1:k)) as
651*
652* A11 := A11 - U12*D*U12**H = A11 - U12*W**H
653*
654* computing blocks of NB columns at a time (note that conjg(W) is
655* actually stored)
656*
657 DO 50 j = ( ( k-1 ) / nb )*nb + 1, 1, -nb
658 jb = min( nb, k-j+1 )
659*
660* Update the upper triangle of the diagonal block
661*
662 DO 40 jj = j, j + jb - 1
663 a( jj, jj ) = real( a( jj, jj ) )
664 CALL cgemv( 'No transpose', jj-j+1, n-k, -cone,
665 $ a( j, k+1 ), lda, w( jj, kw+1 ), ldw, cone,
666 $ a( j, jj ), 1 )
667 a( jj, jj ) = real( a( jj, jj ) )
668 40 CONTINUE
669*
670* Update the rectangular superdiagonal block
671*
672 IF( j.GE.2 )
673 $ CALL cgemm( 'No transpose', 'Transpose', j-1, jb, n-k,
674 $ -cone, a( 1, k+1 ), lda, w( j, kw+1 ), ldw,
675 $ cone, a( 1, j ), lda )
676 50 CONTINUE
677*
678* Put U12 in standard form by partially undoing the interchanges
679* in of rows in columns k+1:n looping backwards from k+1 to n
680*
681 j = k + 1
682 60 CONTINUE
683*
684* Undo the interchanges (if any) of rows J and JP2
685* (or J and JP2, and J+1 and JP1) at each step J
686*
687 kstep = 1
688 jp1 = 1
689* (Here, J is a diagonal index)
690 jj = j
691 jp2 = ipiv( j )
692 IF( jp2.LT.0 ) THEN
693 jp2 = -jp2
694* (Here, J is a diagonal index)
695 j = j + 1
696 jp1 = -ipiv( j )
697 kstep = 2
698 END IF
699* (NOTE: Here, J is used to determine row length. Length N-J+1
700* of the rows to swap back doesn't include diagonal element)
701 j = j + 1
702 IF( jp2.NE.jj .AND. j.LE.n )
703 $ CALL cswap( n-j+1, a( jp2, j ), lda, a( jj, j ), lda )
704 jj = jj + 1
705 IF( kstep.EQ.2 .AND. jp1.NE.jj .AND. j.LE.n )
706 $ CALL cswap( n-j+1, a( jp1, j ), lda, a( jj, j ), lda )
707 IF( j.LT.n )
708 $ GO TO 60
709*
710* Set KB to the number of columns factorized
711*
712 kb = n - k
713*
714 ELSE
715*
716* Factorize the leading columns of A using the lower triangle
717* of A and working forwards, and compute the matrix W = L21*D
718* for use in updating A22 (note that conjg(W) is actually stored)
719*
720* K is the main loop index, increasing from 1 in steps of 1 or 2
721*
722 k = 1
723 70 CONTINUE
724*
725* Exit from loop
726*
727 IF( ( k.GE.nb .AND. nb.LT.n ) .OR. k.GT.n )
728 $ GO TO 90
729*
730 kstep = 1
731 p = k
732*
733* Copy column K of A to column K of W and update column K of W
734*
735 w( k, k ) = real( a( k, k ) )
736 IF( k.LT.n )
737 $ CALL ccopy( n-k, a( k+1, k ), 1, w( k+1, k ), 1 )
738 IF( k.GT.1 ) THEN
739 CALL cgemv( 'No transpose', n-k+1, k-1, -cone, a( k, 1 ),
740 $ lda, w( k, 1 ), ldw, cone, w( k, k ), 1 )
741 w( k, k ) = real( w( k, k ) )
742 END IF
743*
744* Determine rows and columns to be interchanged and whether
745* a 1-by-1 or 2-by-2 pivot block will be used
746*
747 absakk = abs( real( w( k, k ) ) )
748*
749* IMAX is the row-index of the largest off-diagonal element in
750* column K, and COLMAX is its absolute value.
751* Determine both COLMAX and IMAX.
752*
753 IF( k.LT.n ) THEN
754 imax = k + icamax( n-k, w( k+1, k ), 1 )
755 colmax = cabs1( w( imax, k ) )
756 ELSE
757 colmax = zero
758 END IF
759*
760 IF( max( absakk, colmax ).EQ.zero ) THEN
761*
762* Column K is zero or underflow: set INFO and continue
763*
764 IF( info.EQ.0 )
765 $ info = k
766 kp = k
767 a( k, k ) = real( w( k, k ) )
768 IF( k.LT.n )
769 $ CALL ccopy( n-k, w( k+1, k ), 1, a( k+1, k ), 1 )
770 ELSE
771*
772* ============================================================
773*
774* BEGIN pivot search
775*
776* Case(1)
777* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX
778* (used to handle NaN and Inf)
779*
780 IF( .NOT.( absakk.LT.alpha*colmax ) ) THEN
781*
782* no interchange, use 1-by-1 pivot block
783*
784 kp = k
785*
786 ELSE
787*
788 done = .false.
789*
790* Loop until pivot found
791*
792 72 CONTINUE
793*
794* BEGIN pivot search loop body
795*
796*
797* Copy column IMAX to column k+1 of W and update it
798*
799 CALL ccopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1)
800 CALL clacgv( imax-k, w( k, k+1 ), 1 )
801 w( imax, k+1 ) = real( a( imax, imax ) )
802*
803 IF( imax.LT.n )
804 $ CALL ccopy( n-imax, a( imax+1, imax ), 1,
805 $ w( imax+1, k+1 ), 1 )
806*
807 IF( k.GT.1 ) THEN
808 CALL cgemv( 'No transpose', n-k+1, k-1, -cone,
809 $ a( k, 1 ), lda, w( imax, 1 ), ldw,
810 $ cone, w( k, k+1 ), 1 )
811 w( imax, k+1 ) = real( w( imax, k+1 ) )
812 END IF
813*
814* JMAX is the column-index of the largest off-diagonal
815* element in row IMAX, and ROWMAX is its absolute value.
816* Determine both ROWMAX and JMAX.
817*
818 IF( imax.NE.k ) THEN
819 jmax = k - 1 + icamax( imax-k, w( k, k+1 ), 1 )
820 rowmax = cabs1( w( jmax, k+1 ) )
821 ELSE
822 rowmax = zero
823 END IF
824*
825 IF( imax.LT.n ) THEN
826 itemp = imax + icamax( n-imax, w( imax+1, k+1 ), 1)
827 stemp = cabs1( w( itemp, k+1 ) )
828 IF( stemp.GT.rowmax ) THEN
829 rowmax = stemp
830 jmax = itemp
831 END IF
832 END IF
833*
834* Case(2)
835* Equivalent to testing for
836* ABS( REAL( W( IMAX,K+1 ) ) ).GE.ALPHA*ROWMAX
837* (used to handle NaN and Inf)
838*
839 IF( .NOT.( abs( real( w( imax,k+1 ) ) )
840 $ .LT.alpha*rowmax ) ) THEN
841*
842* interchange rows and columns K and IMAX,
843* use 1-by-1 pivot block
844*
845 kp = imax
846*
847* copy column K+1 of W to column K of W
848*
849 CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 )
850*
851 done = .true.
852*
853* Case(3)
854* Equivalent to testing for ROWMAX.EQ.COLMAX,
855* (used to handle NaN and Inf)
856*
857 ELSE IF( ( p.EQ.jmax ) .OR. ( rowmax.LE.colmax ) )
858 $ THEN
859*
860* interchange rows and columns K+1 and IMAX,
861* use 2-by-2 pivot block
862*
863 kp = imax
864 kstep = 2
865 done = .true.
866*
867* Case(4)
868 ELSE
869*
870* Pivot not found: set params and repeat
871*
872 p = imax
873 colmax = rowmax
874 imax = jmax
875*
876* Copy updated JMAXth (next IMAXth) column to Kth of W
877*
878 CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 )
879*
880 END IF
881*
882*
883* End pivot search loop body
884*
885 IF( .NOT.done ) GOTO 72
886*
887 END IF
888*
889* END pivot search
890*
891* ============================================================
892*
893* KK is the column of A where pivoting step stopped
894*
895 kk = k + kstep - 1
896*
897* Interchange rows and columns P and K (only for 2-by-2 pivot).
898* Updated column P is already stored in column K of W.
899*
900 IF( ( kstep.EQ.2 ) .AND. ( p.NE.k ) ) THEN
901*
902* Copy non-updated column KK-1 to column P of submatrix A
903* at step K. No need to copy element into columns
904* K and K+1 of A for 2-by-2 pivot, since these columns
905* will be later overwritten.
906*
907 a( p, p ) = real( a( k, k ) )
908 CALL ccopy( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda )
909 CALL clacgv( p-k-1, a( p, k+1 ), lda )
910 IF( p.LT.n )
911 $ CALL ccopy( n-p, a( p+1, k ), 1, a( p+1, p ), 1 )
912*
913* Interchange rows K and P in first K-1 columns of A
914* (columns K and K+1 of A for 2-by-2 pivot will be
915* later overwritten). Interchange rows K and P
916* in first KK columns of W.
917*
918 IF( k.GT.1 )
919 $ CALL cswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda )
920 CALL cswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw )
921 END IF
922*
923* Interchange rows and columns KP and KK.
924* Updated column KP is already stored in column KK of W.
925*
926 IF( kp.NE.kk ) THEN
927*
928* Copy non-updated column KK to column KP of submatrix A
929* at step K. No need to copy element into column K
930* (or K and K+1 for 2-by-2 pivot) of A, since these columns
931* will be later overwritten.
932*
933 a( kp, kp ) = real( a( kk, kk ) )
934 CALL ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),
935 $ lda )
936 CALL clacgv( kp-kk-1, a( kp, kk+1 ), lda )
937 IF( kp.LT.n )
938 $ CALL ccopy( n-kp, a( kp+1, kk ), 1, a( kp+1, kp ), 1 )
939*
940* Interchange rows KK and KP in first K-1 columns of A
941* (column K (or K and K+1 for 2-by-2 pivot) of A will be
942* later overwritten). Interchange rows KK and KP
943* in first KK columns of W.
944*
945 IF( k.GT.1 )
946 $ CALL cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda )
947 CALL cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw )
948 END IF
949*
950 IF( kstep.EQ.1 ) THEN
951*
952* 1-by-1 pivot block D(k): column k of W now holds
953*
954* W(k) = L(k)*D(k),
955*
956* where L(k) is the k-th column of L
957*
958* (1) Store subdiag. elements of column L(k)
959* and 1-by-1 block D(k) in column k of A.
960* (NOTE: Diagonal element L(k,k) is a UNIT element
961* and not stored)
962* A(k,k) := D(k,k) = W(k,k)
963* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k)
964*
965* (NOTE: No need to use for Hermitian matrix
966* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal
967* element D(k,k) from W (potentially saves only one load))
968 CALL ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 )
969 IF( k.LT.n ) THEN
970*
971* (NOTE: No need to check if A(k,k) is NOT ZERO,
972* since that was ensured earlier in pivot search:
973* case A(k,k) = 0 falls into 2x2 pivot case(3))
974*
975* Handle division by a small number
976*
977 t = real( a( k, k ) )
978 IF( abs( t ).GE.sfmin ) THEN
979 r1 = one / t
980 CALL csscal( n-k, r1, a( k+1, k ), 1 )
981 ELSE
982 DO 74 ii = k + 1, n
983 a( ii, k ) = a( ii, k ) / t
984 74 CONTINUE
985 END IF
986*
987* (2) Conjugate column W(k)
988*
989 CALL clacgv( n-k, w( k+1, k ), 1 )
990 END IF
991*
992 ELSE
993*
994* 2-by-2 pivot block D(k): columns k and k+1 of W now hold
995*
996* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
997*
998* where L(k) and L(k+1) are the k-th and (k+1)-th columns
999* of L
1000*
1001* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2
1002* block D(k:k+1,k:k+1) in columns k and k+1 of A.
1003* NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT
1004* block and not stored.
1005* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1)
1006* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) =
1007* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) )
1008*
1009 IF( k.LT.n-1 ) THEN
1010*
1011* Factor out the columns of the inverse of 2-by-2 pivot
1012* block D, so that each column contains 1, to reduce the
1013* number of FLOPS when we multiply panel
1014* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
1015*
1016* D**(-1) = ( d11 cj(d21) )**(-1) =
1017* ( d21 d22 )
1018*
1019* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
1020* ( (-d21) ( d11 ) )
1021*
1022* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
1023*
1024* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
1025* ( ( -1 ) ( d11/conj(d21) ) )
1026*
1027* = 1/(|d21|**2) * 1/(D22*D11-1) *
1028*
1029* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
1030* ( ( -1 ) ( D22 ) )
1031*
1032* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
1033* ( ( -1 ) ( D22 ) )
1034*
1035* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
1036* ( ( -1 ) ( D22 ) )
1037*
1038* Handle division by a small number. (NOTE: order of
1039* operations is important)
1040*
1041* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) )
1042* ( (( -1 ) ) (( D22 ) ) ),
1043*
1044* where D11 = d22/d21,
1045* D22 = d11/conj(d21),
1046* D21 = d21,
1047* T = 1/(D22*D11-1).
1048*
1049* (NOTE: No need to check for division by ZERO,
1050* since that was ensured earlier in pivot search:
1051* (a) d21 != 0 in 2x2 pivot case(4),
1052* since |d21| should be larger than |d11| and |d22|;
1053* (b) (D22*D11 - 1) != 0, since from (a),
1054* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
1055*
1056 d21 = w( k+1, k )
1057 d11 = w( k+1, k+1 ) / d21
1058 d22 = w( k, k ) / conjg( d21 )
1059 t = one / ( real( d11*d22 )-one )
1060*
1061* Update elements in columns A(k) and A(k+1) as
1062* dot products of rows of ( W(k) W(k+1) ) and columns
1063* of D**(-1)
1064*
1065 DO 80 j = k + 2, n
1066 a( j, k ) = t*( ( d11*w( j, k )-w( j, k+1 ) ) /
1067 $ conjg( d21 ) )
1068 a( j, k+1 ) = t*( ( d22*w( j, k+1 )-w( j, k ) ) /
1069 $ d21 )
1070 80 CONTINUE
1071 END IF
1072*
1073* Copy D(k) to A
1074*
1075 a( k, k ) = w( k, k )
1076 a( k+1, k ) = w( k+1, k )
1077 a( k+1, k+1 ) = w( k+1, k+1 )
1078*
1079* (2) Conjugate columns W(k) and W(k+1)
1080*
1081 CALL clacgv( n-k, w( k+1, k ), 1 )
1082 CALL clacgv( n-k-1, w( k+2, k+1 ), 1 )
1083*
1084 END IF
1085*
1086 END IF
1087*
1088* Store details of the interchanges in IPIV
1089*
1090 IF( kstep.EQ.1 ) THEN
1091 ipiv( k ) = kp
1092 ELSE
1093 ipiv( k ) = -p
1094 ipiv( k+1 ) = -kp
1095 END IF
1096*
1097* Increase K and return to the start of the main loop
1098*
1099 k = k + kstep
1100 GO TO 70
1101*
1102 90 CONTINUE
1103*
1104* Update the lower triangle of A22 (= A(k:n,k:n)) as
1105*
1106* A22 := A22 - L21*D*L21**H = A22 - L21*W**H
1107*
1108* computing blocks of NB columns at a time (note that conjg(W) is
1109* actually stored)
1110*
1111 DO 110 j = k, n, nb
1112 jb = min( nb, n-j+1 )
1113*
1114* Update the lower triangle of the diagonal block
1115*
1116 DO 100 jj = j, j + jb - 1
1117 a( jj, jj ) = real( a( jj, jj ) )
1118 CALL cgemv( 'No transpose', j+jb-jj, k-1, -cone,
1119 $ a( jj, 1 ), lda, w( jj, 1 ), ldw, cone,
1120 $ a( jj, jj ), 1 )
1121 a( jj, jj ) = real( a( jj, jj ) )
1122 100 CONTINUE
1123*
1124* Update the rectangular subdiagonal block
1125*
1126 IF( j+jb.LE.n )
1127 $ CALL cgemm( 'No transpose', 'Transpose', n-j-jb+1, jb,
1128 $ k-1, -cone, a( j+jb, 1 ), lda, w( j, 1 ),
1129 $ ldw, cone, a( j+jb, j ), lda )
1130 110 CONTINUE
1131*
1132* Put L21 in standard form by partially undoing the interchanges
1133* of rows in columns 1:k-1 looping backwards from k-1 to 1
1134*
1135 j = k - 1
1136 120 CONTINUE
1137*
1138* Undo the interchanges (if any) of rows J and JP2
1139* (or J and JP2, and J-1 and JP1) at each step J
1140*
1141 kstep = 1
1142 jp1 = 1
1143* (Here, J is a diagonal index)
1144 jj = j
1145 jp2 = ipiv( j )
1146 IF( jp2.LT.0 ) THEN
1147 jp2 = -jp2
1148* (Here, J is a diagonal index)
1149 j = j - 1
1150 jp1 = -ipiv( j )
1151 kstep = 2
1152 END IF
1153* (NOTE: Here, J is used to determine row length. Length J
1154* of the rows to swap back doesn't include diagonal element)
1155 j = j - 1
1156 IF( jp2.NE.jj .AND. j.GE.1 )
1157 $ CALL cswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda )
1158 jj = jj -1
1159 IF( kstep.EQ.2 .AND. jp1.NE.jj .AND. j.GE.1 )
1160 $ CALL cswap( j, a( jp1, 1 ), lda, a( jj, 1 ), lda )
1161 IF( j.GT.1 )
1162 $ GO TO 120
1163*
1164* Set KB to the number of columns factorized
1165*
1166 kb = k - 1
1167*
1168 END IF
1169 RETURN
1170*
1171* End of CLAHEF_ROOK
1172*
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
subroutine cgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
CGEMV
Definition cgemv.f:160
integer function icamax(n, cx, incx)
ICAMAX
Definition icamax.f:71
subroutine clacgv(n, x, incx)
CLACGV conjugates a complex vector.
Definition clacgv.f:74
real function slamch(cmach)
SLAMCH
Definition slamch.f:68
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
subroutine csscal(n, sa, cx, incx)
CSSCAL
Definition csscal.f:78
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81
Here is the call graph for this function:
Here is the caller graph for this function: