LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ clahef_rook()

subroutine clahef_rook ( character  UPLO,
integer  N,
integer  NB,
integer  KB,
complex, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
complex, dimension( ldw, * )  W,
integer  LDW,
integer  INFO 
)

Download CLAHEF_ROOK + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CLAHEF_ROOK computes a partial factorization of a complex Hermitian
 matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting
 method. The partial factorization has the form:

 A  =  ( I  U12 ) ( A11  0  ) (  I      0     )  if UPLO = 'U', or:
       ( 0  U22 ) (  0   D  ) ( U12**H U22**H )

 A  =  ( L11  0 ) (  D   0  ) ( L11**H L21**H )  if UPLO = 'L'
       ( L21  I ) (  0  A22 ) (  0      I     )

 where the order of D is at most NB. The actual order is returned in
 the argument KB, and is either NB or NB-1, or N if N <= NB.
 Note that U**H denotes the conjugate transpose of U.

 CLAHEF_ROOK is an auxiliary routine called by CHETRF_ROOK. It uses
 blocked code (calling Level 3 BLAS) to update the submatrix
 A11 (if UPLO = 'U') or A22 (if UPLO = 'L').
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          Specifies whether the upper or lower triangular part of the
          Hermitian matrix A is stored:
          = 'U':  Upper triangular
          = 'L':  Lower triangular
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in]NB
          NB is INTEGER
          The maximum number of columns of the matrix A that should be
          factored.  NB should be at least 2 to allow for 2-by-2 pivot
          blocks.
[out]KB
          KB is INTEGER
          The number of columns of A that were actually factored.
          KB is either NB-1 or NB, or N if N <= NB.
[in,out]A
          A is COMPLEX array, dimension (LDA,N)
          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
          n-by-n upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading n-by-n lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.
          On exit, A contains details of the partial factorization.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          Details of the interchanges and the block structure of D.

          If UPLO = 'U':
             Only the last KB elements of IPIV are set.

             If IPIV(k) > 0, then rows and columns k and IPIV(k) were
             interchanged and D(k,k) is a 1-by-1 diagonal block.

             If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and
             columns k and -IPIV(k) were interchanged and rows and
             columns k-1 and -IPIV(k-1) were inerchaged,
             D(k-1:k,k-1:k) is a 2-by-2 diagonal block.

          If UPLO = 'L':
             Only the first KB elements of IPIV are set.

             If IPIV(k) > 0, then rows and columns k and IPIV(k)
             were interchanged and D(k,k) is a 1-by-1 diagonal block.

             If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and
             columns k and -IPIV(k) were interchanged and rows and
             columns k+1 and -IPIV(k+1) were inerchaged,
             D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
[out]W
          W is COMPLEX array, dimension (LDW,NB)
[in]LDW
          LDW is INTEGER
          The leading dimension of the array W.  LDW >= max(1,N).
[out]INFO
          INFO is INTEGER
          = 0: successful exit
          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
               has been completed, but the block diagonal matrix D is
               exactly singular.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
  November 2013, Igor Kozachenko,
                  Computer Science Division,
                  University of California, Berkeley

  September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas,
                  School of Mathematics,
                  University of Manchester

Definition at line 182 of file clahef_rook.f.

184 *
185 * -- LAPACK computational routine --
186 * -- LAPACK is a software package provided by Univ. of Tennessee, --
187 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
188 *
189 * .. Scalar Arguments ..
190  CHARACTER UPLO
191  INTEGER INFO, KB, LDA, LDW, N, NB
192 * ..
193 * .. Array Arguments ..
194  INTEGER IPIV( * )
195  COMPLEX A( LDA, * ), W( LDW, * )
196 * ..
197 *
198 * =====================================================================
199 *
200 * .. Parameters ..
201  REAL ZERO, ONE
202  parameter( zero = 0.0e+0, one = 1.0e+0 )
203  COMPLEX CONE
204  parameter( cone = ( 1.0e+0, 0.0e+0 ) )
205  REAL EIGHT, SEVTEN
206  parameter( eight = 8.0e+0, sevten = 17.0e+0 )
207 * ..
208 * .. Local Scalars ..
209  LOGICAL DONE
210  INTEGER IMAX, ITEMP, II, J, JB, JJ, JMAX, JP1, JP2, K,
211  $ KK, KKW, KP, KSTEP, KW, P
212  REAL ABSAKK, ALPHA, COLMAX, STEMP, R1, ROWMAX, T,
213  $ SFMIN
214  COMPLEX D11, D21, D22, Z
215 * ..
216 * .. External Functions ..
217  LOGICAL LSAME
218  INTEGER ICAMAX
219  REAL SLAMCH
220  EXTERNAL lsame, icamax, slamch
221 * ..
222 * .. External Subroutines ..
223  EXTERNAL ccopy, csscal, cgemm, cgemv, clacgv, cswap
224 * ..
225 * .. Intrinsic Functions ..
226  INTRINSIC abs, conjg, aimag, max, min, real, sqrt
227 * ..
228 * .. Statement Functions ..
229  REAL CABS1
230 * ..
231 * .. Statement Function definitions ..
232  cabs1( z ) = abs( real( z ) ) + abs( aimag( z ) )
233 * ..
234 * .. Executable Statements ..
235 *
236  info = 0
237 *
238 * Initialize ALPHA for use in choosing pivot block size.
239 *
240  alpha = ( one+sqrt( sevten ) ) / eight
241 *
242 * Compute machine safe minimum
243 *
244  sfmin = slamch( 'S' )
245 *
246  IF( lsame( uplo, 'U' ) ) THEN
247 *
248 * Factorize the trailing columns of A using the upper triangle
249 * of A and working backwards, and compute the matrix W = U12*D
250 * for use in updating A11 (note that conjg(W) is actually stored)
251 *
252 * K is the main loop index, decreasing from N in steps of 1 or 2
253 *
254  k = n
255  10 CONTINUE
256 *
257 * KW is the column of W which corresponds to column K of A
258 *
259  kw = nb + k - n
260 *
261 * Exit from loop
262 *
263  IF( ( k.LE.n-nb+1 .AND. nb.LT.n ) .OR. k.LT.1 )
264  $ GO TO 30
265 *
266  kstep = 1
267  p = k
268 *
269 * Copy column K of A to column KW of W and update it
270 *
271  IF( k.GT.1 )
272  $ CALL ccopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 )
273  w( k, kw ) = real( a( k, k ) )
274  IF( k.LT.n ) THEN
275  CALL cgemv( 'No transpose', k, n-k, -cone, a( 1, k+1 ), lda,
276  $ w( k, kw+1 ), ldw, cone, w( 1, kw ), 1 )
277  w( k, kw ) = real( w( k, kw ) )
278  END IF
279 *
280 * Determine rows and columns to be interchanged and whether
281 * a 1-by-1 or 2-by-2 pivot block will be used
282 *
283  absakk = abs( real( w( k, kw ) ) )
284 *
285 * IMAX is the row-index of the largest off-diagonal element in
286 * column K, and COLMAX is its absolute value.
287 * Determine both COLMAX and IMAX.
288 *
289  IF( k.GT.1 ) THEN
290  imax = icamax( k-1, w( 1, kw ), 1 )
291  colmax = cabs1( w( imax, kw ) )
292  ELSE
293  colmax = zero
294  END IF
295 *
296  IF( max( absakk, colmax ).EQ.zero ) THEN
297 *
298 * Column K is zero or underflow: set INFO and continue
299 *
300  IF( info.EQ.0 )
301  $ info = k
302  kp = k
303  a( k, k ) = real( w( k, kw ) )
304  IF( k.GT.1 )
305  $ CALL ccopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 )
306  ELSE
307 *
308 * ============================================================
309 *
310 * BEGIN pivot search
311 *
312 * Case(1)
313 * Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX
314 * (used to handle NaN and Inf)
315  IF( .NOT.( absakk.LT.alpha*colmax ) ) THEN
316 *
317 * no interchange, use 1-by-1 pivot block
318 *
319  kp = k
320 *
321  ELSE
322 *
323 * Lop until pivot found
324 *
325  done = .false.
326 *
327  12 CONTINUE
328 *
329 * BEGIN pivot search loop body
330 *
331 *
332 * Copy column IMAX to column KW-1 of W and update it
333 *
334  IF( imax.GT.1 )
335  $ CALL ccopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ),
336  $ 1 )
337  w( imax, kw-1 ) = real( a( imax, imax ) )
338 *
339  CALL ccopy( k-imax, a( imax, imax+1 ), lda,
340  $ w( imax+1, kw-1 ), 1 )
341  CALL clacgv( k-imax, w( imax+1, kw-1 ), 1 )
342 *
343  IF( k.LT.n ) THEN
344  CALL cgemv( 'No transpose', k, n-k, -cone,
345  $ a( 1, k+1 ), lda, w( imax, kw+1 ), ldw,
346  $ cone, w( 1, kw-1 ), 1 )
347  w( imax, kw-1 ) = real( w( imax, kw-1 ) )
348  END IF
349 *
350 * JMAX is the column-index of the largest off-diagonal
351 * element in row IMAX, and ROWMAX is its absolute value.
352 * Determine both ROWMAX and JMAX.
353 *
354  IF( imax.NE.k ) THEN
355  jmax = imax + icamax( k-imax, w( imax+1, kw-1 ),
356  $ 1 )
357  rowmax = cabs1( w( jmax, kw-1 ) )
358  ELSE
359  rowmax = zero
360  END IF
361 *
362  IF( imax.GT.1 ) THEN
363  itemp = icamax( imax-1, w( 1, kw-1 ), 1 )
364  stemp = cabs1( w( itemp, kw-1 ) )
365  IF( stemp.GT.rowmax ) THEN
366  rowmax = stemp
367  jmax = itemp
368  END IF
369  END IF
370 *
371 * Case(2)
372 * Equivalent to testing for
373 * ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX
374 * (used to handle NaN and Inf)
375 *
376  IF( .NOT.( abs( real( w( imax,kw-1 ) ) )
377  $ .LT.alpha*rowmax ) ) THEN
378 *
379 * interchange rows and columns K and IMAX,
380 * use 1-by-1 pivot block
381 *
382  kp = imax
383 *
384 * copy column KW-1 of W to column KW of W
385 *
386  CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
387 *
388  done = .true.
389 *
390 * Case(3)
391 * Equivalent to testing for ROWMAX.EQ.COLMAX,
392 * (used to handle NaN and Inf)
393 *
394  ELSE IF( ( p.EQ.jmax ) .OR. ( rowmax.LE.colmax ) )
395  $ THEN
396 *
397 * interchange rows and columns K-1 and IMAX,
398 * use 2-by-2 pivot block
399 *
400  kp = imax
401  kstep = 2
402  done = .true.
403 *
404 * Case(4)
405  ELSE
406 *
407 * Pivot not found: set params and repeat
408 *
409  p = imax
410  colmax = rowmax
411  imax = jmax
412 *
413 * Copy updated JMAXth (next IMAXth) column to Kth of W
414 *
415  CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
416 *
417  END IF
418 *
419 *
420 * END pivot search loop body
421 *
422  IF( .NOT.done ) GOTO 12
423 *
424  END IF
425 *
426 * END pivot search
427 *
428 * ============================================================
429 *
430 * KK is the column of A where pivoting step stopped
431 *
432  kk = k - kstep + 1
433 *
434 * KKW is the column of W which corresponds to column KK of A
435 *
436  kkw = nb + kk - n
437 *
438 * Interchange rows and columns P and K.
439 * Updated column P is already stored in column KW of W.
440 *
441  IF( ( kstep.EQ.2 ) .AND. ( p.NE.k ) ) THEN
442 *
443 * Copy non-updated column K to column P of submatrix A
444 * at step K. No need to copy element into columns
445 * K and K-1 of A for 2-by-2 pivot, since these columns
446 * will be later overwritten.
447 *
448  a( p, p ) = real( a( k, k ) )
449  CALL ccopy( k-1-p, a( p+1, k ), 1, a( p, p+1 ),
450  $ lda )
451  CALL clacgv( k-1-p, a( p, p+1 ), lda )
452  IF( p.GT.1 )
453  $ CALL ccopy( p-1, a( 1, k ), 1, a( 1, p ), 1 )
454 *
455 * Interchange rows K and P in the last K+1 to N columns of A
456 * (columns K and K-1 of A for 2-by-2 pivot will be
457 * later overwritten). Interchange rows K and P
458 * in last KKW to NB columns of W.
459 *
460  IF( k.LT.n )
461  $ CALL cswap( n-k, a( k, k+1 ), lda, a( p, k+1 ),
462  $ lda )
463  CALL cswap( n-kk+1, w( k, kkw ), ldw, w( p, kkw ),
464  $ ldw )
465  END IF
466 *
467 * Interchange rows and columns KP and KK.
468 * Updated column KP is already stored in column KKW of W.
469 *
470  IF( kp.NE.kk ) THEN
471 *
472 * Copy non-updated column KK to column KP of submatrix A
473 * at step K. No need to copy element into column K
474 * (or K and K-1 for 2-by-2 pivot) of A, since these columns
475 * will be later overwritten.
476 *
477  a( kp, kp ) = real( a( kk, kk ) )
478  CALL ccopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),
479  $ lda )
480  CALL clacgv( kk-1-kp, a( kp, kp+1 ), lda )
481  IF( kp.GT.1 )
482  $ CALL ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 )
483 *
484 * Interchange rows KK and KP in last K+1 to N columns of A
485 * (columns K (or K and K-1 for 2-by-2 pivot) of A will be
486 * later overwritten). Interchange rows KK and KP
487 * in last KKW to NB columns of W.
488 *
489  IF( k.LT.n )
490  $ CALL cswap( n-k, a( kk, k+1 ), lda, a( kp, k+1 ),
491  $ lda )
492  CALL cswap( n-kk+1, w( kk, kkw ), ldw, w( kp, kkw ),
493  $ ldw )
494  END IF
495 *
496  IF( kstep.EQ.1 ) THEN
497 *
498 * 1-by-1 pivot block D(k): column kw of W now holds
499 *
500 * W(kw) = U(k)*D(k),
501 *
502 * where U(k) is the k-th column of U
503 *
504 * (1) Store subdiag. elements of column U(k)
505 * and 1-by-1 block D(k) in column k of A.
506 * (NOTE: Diagonal element U(k,k) is a UNIT element
507 * and not stored)
508 * A(k,k) := D(k,k) = W(k,kw)
509 * A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k)
510 *
511 * (NOTE: No need to use for Hermitian matrix
512 * A( K, K ) = REAL( W( K, K) ) to separately copy diagonal
513 * element D(k,k) from W (potentially saves only one load))
514  CALL ccopy( k, w( 1, kw ), 1, a( 1, k ), 1 )
515  IF( k.GT.1 ) THEN
516 *
517 * (NOTE: No need to check if A(k,k) is NOT ZERO,
518 * since that was ensured earlier in pivot search:
519 * case A(k,k) = 0 falls into 2x2 pivot case(3))
520 *
521 * Handle division by a small number
522 *
523  t = real( a( k, k ) )
524  IF( abs( t ).GE.sfmin ) THEN
525  r1 = one / t
526  CALL csscal( k-1, r1, a( 1, k ), 1 )
527  ELSE
528  DO 14 ii = 1, k-1
529  a( ii, k ) = a( ii, k ) / t
530  14 CONTINUE
531  END IF
532 *
533 * (2) Conjugate column W(kw)
534 *
535  CALL clacgv( k-1, w( 1, kw ), 1 )
536  END IF
537 *
538  ELSE
539 *
540 * 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold
541 *
542 * ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k)
543 *
544 * where U(k) and U(k-1) are the k-th and (k-1)-th columns
545 * of U
546 *
547 * (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2
548 * block D(k-1:k,k-1:k) in columns k-1 and k of A.
549 * (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT
550 * block and not stored)
551 * A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw)
552 * A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) =
553 * = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) )
554 *
555  IF( k.GT.2 ) THEN
556 *
557 * Factor out the columns of the inverse of 2-by-2 pivot
558 * block D, so that each column contains 1, to reduce the
559 * number of FLOPS when we multiply panel
560 * ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
561 *
562 * D**(-1) = ( d11 cj(d21) )**(-1) =
563 * ( d21 d22 )
564 *
565 * = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
566 * ( (-d21) ( d11 ) )
567 *
568 * = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
569 *
570 * * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
571 * ( ( -1 ) ( d11/conj(d21) ) )
572 *
573 * = 1/(|d21|**2) * 1/(D22*D11-1) *
574 *
575 * * ( d21*( D11 ) conj(d21)*( -1 ) ) =
576 * ( ( -1 ) ( D22 ) )
577 *
578 * = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
579 * ( ( -1 ) ( D22 ) )
580 *
581 * = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
582 * ( ( -1 ) ( D22 ) )
583 *
584 * Handle division by a small number. (NOTE: order of
585 * operations is important)
586 *
587 * = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) )
588 * ( (( -1 ) ) (( D22 ) ) ),
589 *
590 * where D11 = d22/d21,
591 * D22 = d11/conj(d21),
592 * D21 = d21,
593 * T = 1/(D22*D11-1).
594 *
595 * (NOTE: No need to check for division by ZERO,
596 * since that was ensured earlier in pivot search:
597 * (a) d21 != 0 in 2x2 pivot case(4),
598 * since |d21| should be larger than |d11| and |d22|;
599 * (b) (D22*D11 - 1) != 0, since from (a),
600 * both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
601 *
602  d21 = w( k-1, kw )
603  d11 = w( k, kw ) / conjg( d21 )
604  d22 = w( k-1, kw-1 ) / d21
605  t = one / ( real( d11*d22 )-one )
606 *
607 * Update elements in columns A(k-1) and A(k) as
608 * dot products of rows of ( W(kw-1) W(kw) ) and columns
609 * of D**(-1)
610 *
611  DO 20 j = 1, k - 2
612  a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /
613  $ d21 )
614  a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /
615  $ conjg( d21 ) )
616  20 CONTINUE
617  END IF
618 *
619 * Copy D(k) to A
620 *
621  a( k-1, k-1 ) = w( k-1, kw-1 )
622  a( k-1, k ) = w( k-1, kw )
623  a( k, k ) = w( k, kw )
624 *
625 * (2) Conjugate columns W(kw) and W(kw-1)
626 *
627  CALL clacgv( k-1, w( 1, kw ), 1 )
628  CALL clacgv( k-2, w( 1, kw-1 ), 1 )
629 *
630  END IF
631 *
632  END IF
633 *
634 * Store details of the interchanges in IPIV
635 *
636  IF( kstep.EQ.1 ) THEN
637  ipiv( k ) = kp
638  ELSE
639  ipiv( k ) = -p
640  ipiv( k-1 ) = -kp
641  END IF
642 *
643 * Decrease K and return to the start of the main loop
644 *
645  k = k - kstep
646  GO TO 10
647 *
648  30 CONTINUE
649 *
650 * Update the upper triangle of A11 (= A(1:k,1:k)) as
651 *
652 * A11 := A11 - U12*D*U12**H = A11 - U12*W**H
653 *
654 * computing blocks of NB columns at a time (note that conjg(W) is
655 * actually stored)
656 *
657  DO 50 j = ( ( k-1 ) / nb )*nb + 1, 1, -nb
658  jb = min( nb, k-j+1 )
659 *
660 * Update the upper triangle of the diagonal block
661 *
662  DO 40 jj = j, j + jb - 1
663  a( jj, jj ) = real( a( jj, jj ) )
664  CALL cgemv( 'No transpose', jj-j+1, n-k, -cone,
665  $ a( j, k+1 ), lda, w( jj, kw+1 ), ldw, cone,
666  $ a( j, jj ), 1 )
667  a( jj, jj ) = real( a( jj, jj ) )
668  40 CONTINUE
669 *
670 * Update the rectangular superdiagonal block
671 *
672  IF( j.GE.2 )
673  $ CALL cgemm( 'No transpose', 'Transpose', j-1, jb, n-k,
674  $ -cone, a( 1, k+1 ), lda, w( j, kw+1 ), ldw,
675  $ cone, a( 1, j ), lda )
676  50 CONTINUE
677 *
678 * Put U12 in standard form by partially undoing the interchanges
679 * in of rows in columns k+1:n looping backwards from k+1 to n
680 *
681  j = k + 1
682  60 CONTINUE
683 *
684 * Undo the interchanges (if any) of rows J and JP2
685 * (or J and JP2, and J+1 and JP1) at each step J
686 *
687  kstep = 1
688  jp1 = 1
689 * (Here, J is a diagonal index)
690  jj = j
691  jp2 = ipiv( j )
692  IF( jp2.LT.0 ) THEN
693  jp2 = -jp2
694 * (Here, J is a diagonal index)
695  j = j + 1
696  jp1 = -ipiv( j )
697  kstep = 2
698  END IF
699 * (NOTE: Here, J is used to determine row length. Length N-J+1
700 * of the rows to swap back doesn't include diagonal element)
701  j = j + 1
702  IF( jp2.NE.jj .AND. j.LE.n )
703  $ CALL cswap( n-j+1, a( jp2, j ), lda, a( jj, j ), lda )
704  jj = jj + 1
705  IF( kstep.EQ.2 .AND. jp1.NE.jj .AND. j.LE.n )
706  $ CALL cswap( n-j+1, a( jp1, j ), lda, a( jj, j ), lda )
707  IF( j.LT.n )
708  $ GO TO 60
709 *
710 * Set KB to the number of columns factorized
711 *
712  kb = n - k
713 *
714  ELSE
715 *
716 * Factorize the leading columns of A using the lower triangle
717 * of A and working forwards, and compute the matrix W = L21*D
718 * for use in updating A22 (note that conjg(W) is actually stored)
719 *
720 * K is the main loop index, increasing from 1 in steps of 1 or 2
721 *
722  k = 1
723  70 CONTINUE
724 *
725 * Exit from loop
726 *
727  IF( ( k.GE.nb .AND. nb.LT.n ) .OR. k.GT.n )
728  $ GO TO 90
729 *
730  kstep = 1
731  p = k
732 *
733 * Copy column K of A to column K of W and update column K of W
734 *
735  w( k, k ) = real( a( k, k ) )
736  IF( k.LT.n )
737  $ CALL ccopy( n-k, a( k+1, k ), 1, w( k+1, k ), 1 )
738  IF( k.GT.1 ) THEN
739  CALL cgemv( 'No transpose', n-k+1, k-1, -cone, a( k, 1 ),
740  $ lda, w( k, 1 ), ldw, cone, w( k, k ), 1 )
741  w( k, k ) = real( w( k, k ) )
742  END IF
743 *
744 * Determine rows and columns to be interchanged and whether
745 * a 1-by-1 or 2-by-2 pivot block will be used
746 *
747  absakk = abs( real( w( k, k ) ) )
748 *
749 * IMAX is the row-index of the largest off-diagonal element in
750 * column K, and COLMAX is its absolute value.
751 * Determine both COLMAX and IMAX.
752 *
753  IF( k.LT.n ) THEN
754  imax = k + icamax( n-k, w( k+1, k ), 1 )
755  colmax = cabs1( w( imax, k ) )
756  ELSE
757  colmax = zero
758  END IF
759 *
760  IF( max( absakk, colmax ).EQ.zero ) THEN
761 *
762 * Column K is zero or underflow: set INFO and continue
763 *
764  IF( info.EQ.0 )
765  $ info = k
766  kp = k
767  a( k, k ) = real( w( k, k ) )
768  IF( k.LT.n )
769  $ CALL ccopy( n-k, w( k+1, k ), 1, a( k+1, k ), 1 )
770  ELSE
771 *
772 * ============================================================
773 *
774 * BEGIN pivot search
775 *
776 * Case(1)
777 * Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX
778 * (used to handle NaN and Inf)
779 *
780  IF( .NOT.( absakk.LT.alpha*colmax ) ) THEN
781 *
782 * no interchange, use 1-by-1 pivot block
783 *
784  kp = k
785 *
786  ELSE
787 *
788  done = .false.
789 *
790 * Loop until pivot found
791 *
792  72 CONTINUE
793 *
794 * BEGIN pivot search loop body
795 *
796 *
797 * Copy column IMAX to column k+1 of W and update it
798 *
799  CALL ccopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1)
800  CALL clacgv( imax-k, w( k, k+1 ), 1 )
801  w( imax, k+1 ) = real( a( imax, imax ) )
802 *
803  IF( imax.LT.n )
804  $ CALL ccopy( n-imax, a( imax+1, imax ), 1,
805  $ w( imax+1, k+1 ), 1 )
806 *
807  IF( k.GT.1 ) THEN
808  CALL cgemv( 'No transpose', n-k+1, k-1, -cone,
809  $ a( k, 1 ), lda, w( imax, 1 ), ldw,
810  $ cone, w( k, k+1 ), 1 )
811  w( imax, k+1 ) = real( w( imax, k+1 ) )
812  END IF
813 *
814 * JMAX is the column-index of the largest off-diagonal
815 * element in row IMAX, and ROWMAX is its absolute value.
816 * Determine both ROWMAX and JMAX.
817 *
818  IF( imax.NE.k ) THEN
819  jmax = k - 1 + icamax( imax-k, w( k, k+1 ), 1 )
820  rowmax = cabs1( w( jmax, k+1 ) )
821  ELSE
822  rowmax = zero
823  END IF
824 *
825  IF( imax.LT.n ) THEN
826  itemp = imax + icamax( n-imax, w( imax+1, k+1 ), 1)
827  stemp = cabs1( w( itemp, k+1 ) )
828  IF( stemp.GT.rowmax ) THEN
829  rowmax = stemp
830  jmax = itemp
831  END IF
832  END IF
833 *
834 * Case(2)
835 * Equivalent to testing for
836 * ABS( REAL( W( IMAX,K+1 ) ) ).GE.ALPHA*ROWMAX
837 * (used to handle NaN and Inf)
838 *
839  IF( .NOT.( abs( real( w( imax,k+1 ) ) )
840  $ .LT.alpha*rowmax ) ) THEN
841 *
842 * interchange rows and columns K and IMAX,
843 * use 1-by-1 pivot block
844 *
845  kp = imax
846 *
847 * copy column K+1 of W to column K of W
848 *
849  CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 )
850 *
851  done = .true.
852 *
853 * Case(3)
854 * Equivalent to testing for ROWMAX.EQ.COLMAX,
855 * (used to handle NaN and Inf)
856 *
857  ELSE IF( ( p.EQ.jmax ) .OR. ( rowmax.LE.colmax ) )
858  $ THEN
859 *
860 * interchange rows and columns K+1 and IMAX,
861 * use 2-by-2 pivot block
862 *
863  kp = imax
864  kstep = 2
865  done = .true.
866 *
867 * Case(4)
868  ELSE
869 *
870 * Pivot not found: set params and repeat
871 *
872  p = imax
873  colmax = rowmax
874  imax = jmax
875 *
876 * Copy updated JMAXth (next IMAXth) column to Kth of W
877 *
878  CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 )
879 *
880  END IF
881 *
882 *
883 * End pivot search loop body
884 *
885  IF( .NOT.done ) GOTO 72
886 *
887  END IF
888 *
889 * END pivot search
890 *
891 * ============================================================
892 *
893 * KK is the column of A where pivoting step stopped
894 *
895  kk = k + kstep - 1
896 *
897 * Interchange rows and columns P and K (only for 2-by-2 pivot).
898 * Updated column P is already stored in column K of W.
899 *
900  IF( ( kstep.EQ.2 ) .AND. ( p.NE.k ) ) THEN
901 *
902 * Copy non-updated column KK-1 to column P of submatrix A
903 * at step K. No need to copy element into columns
904 * K and K+1 of A for 2-by-2 pivot, since these columns
905 * will be later overwritten.
906 *
907  a( p, p ) = real( a( k, k ) )
908  CALL ccopy( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda )
909  CALL clacgv( p-k-1, a( p, k+1 ), lda )
910  IF( p.LT.n )
911  $ CALL ccopy( n-p, a( p+1, k ), 1, a( p+1, p ), 1 )
912 *
913 * Interchange rows K and P in first K-1 columns of A
914 * (columns K and K+1 of A for 2-by-2 pivot will be
915 * later overwritten). Interchange rows K and P
916 * in first KK columns of W.
917 *
918  IF( k.GT.1 )
919  $ CALL cswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda )
920  CALL cswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw )
921  END IF
922 *
923 * Interchange rows and columns KP and KK.
924 * Updated column KP is already stored in column KK of W.
925 *
926  IF( kp.NE.kk ) THEN
927 *
928 * Copy non-updated column KK to column KP of submatrix A
929 * at step K. No need to copy element into column K
930 * (or K and K+1 for 2-by-2 pivot) of A, since these columns
931 * will be later overwritten.
932 *
933  a( kp, kp ) = real( a( kk, kk ) )
934  CALL ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),
935  $ lda )
936  CALL clacgv( kp-kk-1, a( kp, kk+1 ), lda )
937  IF( kp.LT.n )
938  $ CALL ccopy( n-kp, a( kp+1, kk ), 1, a( kp+1, kp ), 1 )
939 *
940 * Interchange rows KK and KP in first K-1 columns of A
941 * (column K (or K and K+1 for 2-by-2 pivot) of A will be
942 * later overwritten). Interchange rows KK and KP
943 * in first KK columns of W.
944 *
945  IF( k.GT.1 )
946  $ CALL cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda )
947  CALL cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw )
948  END IF
949 *
950  IF( kstep.EQ.1 ) THEN
951 *
952 * 1-by-1 pivot block D(k): column k of W now holds
953 *
954 * W(k) = L(k)*D(k),
955 *
956 * where L(k) is the k-th column of L
957 *
958 * (1) Store subdiag. elements of column L(k)
959 * and 1-by-1 block D(k) in column k of A.
960 * (NOTE: Diagonal element L(k,k) is a UNIT element
961 * and not stored)
962 * A(k,k) := D(k,k) = W(k,k)
963 * A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k)
964 *
965 * (NOTE: No need to use for Hermitian matrix
966 * A( K, K ) = REAL( W( K, K) ) to separately copy diagonal
967 * element D(k,k) from W (potentially saves only one load))
968  CALL ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 )
969  IF( k.LT.n ) THEN
970 *
971 * (NOTE: No need to check if A(k,k) is NOT ZERO,
972 * since that was ensured earlier in pivot search:
973 * case A(k,k) = 0 falls into 2x2 pivot case(3))
974 *
975 * Handle division by a small number
976 *
977  t = real( a( k, k ) )
978  IF( abs( t ).GE.sfmin ) THEN
979  r1 = one / t
980  CALL csscal( n-k, r1, a( k+1, k ), 1 )
981  ELSE
982  DO 74 ii = k + 1, n
983  a( ii, k ) = a( ii, k ) / t
984  74 CONTINUE
985  END IF
986 *
987 * (2) Conjugate column W(k)
988 *
989  CALL clacgv( n-k, w( k+1, k ), 1 )
990  END IF
991 *
992  ELSE
993 *
994 * 2-by-2 pivot block D(k): columns k and k+1 of W now hold
995 *
996 * ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
997 *
998 * where L(k) and L(k+1) are the k-th and (k+1)-th columns
999 * of L
1000 *
1001 * (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2
1002 * block D(k:k+1,k:k+1) in columns k and k+1 of A.
1003 * NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT
1004 * block and not stored.
1005 * A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1)
1006 * A(k+2:N,k:k+1) := L(k+2:N,k:k+1) =
1007 * = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) )
1008 *
1009  IF( k.LT.n-1 ) THEN
1010 *
1011 * Factor out the columns of the inverse of 2-by-2 pivot
1012 * block D, so that each column contains 1, to reduce the
1013 * number of FLOPS when we multiply panel
1014 * ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
1015 *
1016 * D**(-1) = ( d11 cj(d21) )**(-1) =
1017 * ( d21 d22 )
1018 *
1019 * = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
1020 * ( (-d21) ( d11 ) )
1021 *
1022 * = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
1023 *
1024 * * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
1025 * ( ( -1 ) ( d11/conj(d21) ) )
1026 *
1027 * = 1/(|d21|**2) * 1/(D22*D11-1) *
1028 *
1029 * * ( d21*( D11 ) conj(d21)*( -1 ) ) =
1030 * ( ( -1 ) ( D22 ) )
1031 *
1032 * = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
1033 * ( ( -1 ) ( D22 ) )
1034 *
1035 * = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
1036 * ( ( -1 ) ( D22 ) )
1037 *
1038 * Handle division by a small number. (NOTE: order of
1039 * operations is important)
1040 *
1041 * = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) )
1042 * ( (( -1 ) ) (( D22 ) ) ),
1043 *
1044 * where D11 = d22/d21,
1045 * D22 = d11/conj(d21),
1046 * D21 = d21,
1047 * T = 1/(D22*D11-1).
1048 *
1049 * (NOTE: No need to check for division by ZERO,
1050 * since that was ensured earlier in pivot search:
1051 * (a) d21 != 0 in 2x2 pivot case(4),
1052 * since |d21| should be larger than |d11| and |d22|;
1053 * (b) (D22*D11 - 1) != 0, since from (a),
1054 * both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
1055 *
1056  d21 = w( k+1, k )
1057  d11 = w( k+1, k+1 ) / d21
1058  d22 = w( k, k ) / conjg( d21 )
1059  t = one / ( real( d11*d22 )-one )
1060 *
1061 * Update elements in columns A(k) and A(k+1) as
1062 * dot products of rows of ( W(k) W(k+1) ) and columns
1063 * of D**(-1)
1064 *
1065  DO 80 j = k + 2, n
1066  a( j, k ) = t*( ( d11*w( j, k )-w( j, k+1 ) ) /
1067  $ conjg( d21 ) )
1068  a( j, k+1 ) = t*( ( d22*w( j, k+1 )-w( j, k ) ) /
1069  $ d21 )
1070  80 CONTINUE
1071  END IF
1072 *
1073 * Copy D(k) to A
1074 *
1075  a( k, k ) = w( k, k )
1076  a( k+1, k ) = w( k+1, k )
1077  a( k+1, k+1 ) = w( k+1, k+1 )
1078 *
1079 * (2) Conjugate columns W(k) and W(k+1)
1080 *
1081  CALL clacgv( n-k, w( k+1, k ), 1 )
1082  CALL clacgv( n-k-1, w( k+2, k+1 ), 1 )
1083 *
1084  END IF
1085 *
1086  END IF
1087 *
1088 * Store details of the interchanges in IPIV
1089 *
1090  IF( kstep.EQ.1 ) THEN
1091  ipiv( k ) = kp
1092  ELSE
1093  ipiv( k ) = -p
1094  ipiv( k+1 ) = -kp
1095  END IF
1096 *
1097 * Increase K and return to the start of the main loop
1098 *
1099  k = k + kstep
1100  GO TO 70
1101 *
1102  90 CONTINUE
1103 *
1104 * Update the lower triangle of A22 (= A(k:n,k:n)) as
1105 *
1106 * A22 := A22 - L21*D*L21**H = A22 - L21*W**H
1107 *
1108 * computing blocks of NB columns at a time (note that conjg(W) is
1109 * actually stored)
1110 *
1111  DO 110 j = k, n, nb
1112  jb = min( nb, n-j+1 )
1113 *
1114 * Update the lower triangle of the diagonal block
1115 *
1116  DO 100 jj = j, j + jb - 1
1117  a( jj, jj ) = real( a( jj, jj ) )
1118  CALL cgemv( 'No transpose', j+jb-jj, k-1, -cone,
1119  $ a( jj, 1 ), lda, w( jj, 1 ), ldw, cone,
1120  $ a( jj, jj ), 1 )
1121  a( jj, jj ) = real( a( jj, jj ) )
1122  100 CONTINUE
1123 *
1124 * Update the rectangular subdiagonal block
1125 *
1126  IF( j+jb.LE.n )
1127  $ CALL cgemm( 'No transpose', 'Transpose', n-j-jb+1, jb,
1128  $ k-1, -cone, a( j+jb, 1 ), lda, w( j, 1 ),
1129  $ ldw, cone, a( j+jb, j ), lda )
1130  110 CONTINUE
1131 *
1132 * Put L21 in standard form by partially undoing the interchanges
1133 * of rows in columns 1:k-1 looping backwards from k-1 to 1
1134 *
1135  j = k - 1
1136  120 CONTINUE
1137 *
1138 * Undo the interchanges (if any) of rows J and JP2
1139 * (or J and JP2, and J-1 and JP1) at each step J
1140 *
1141  kstep = 1
1142  jp1 = 1
1143 * (Here, J is a diagonal index)
1144  jj = j
1145  jp2 = ipiv( j )
1146  IF( jp2.LT.0 ) THEN
1147  jp2 = -jp2
1148 * (Here, J is a diagonal index)
1149  j = j - 1
1150  jp1 = -ipiv( j )
1151  kstep = 2
1152  END IF
1153 * (NOTE: Here, J is used to determine row length. Length J
1154 * of the rows to swap back doesn't include diagonal element)
1155  j = j - 1
1156  IF( jp2.NE.jj .AND. j.GE.1 )
1157  $ CALL cswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda )
1158  jj = jj -1
1159  IF( kstep.EQ.2 .AND. jp1.NE.jj .AND. j.GE.1 )
1160  $ CALL cswap( j, a( jp1, 1 ), lda, a( jj, 1 ), lda )
1161  IF( j.GT.1 )
1162  $ GO TO 120
1163 *
1164 * Set KB to the number of columns factorized
1165 *
1166  kb = k - 1
1167 *
1168  END IF
1169  RETURN
1170 *
1171 * End of CLAHEF_ROOK
1172 *
integer function icamax(N, CX, INCX)
ICAMAX
Definition: icamax.f:71
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine ccopy(N, CX, INCX, CY, INCY)
CCOPY
Definition: ccopy.f:81
subroutine csscal(N, SA, CX, INCX)
CSSCAL
Definition: csscal.f:78
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
Definition: cswap.f:81
subroutine cgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
CGEMV
Definition: cgemv.f:158
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:187
subroutine clacgv(N, X, INCX)
CLACGV conjugates a complex vector.
Definition: clacgv.f:74
real function slamch(CMACH)
SLAMCH
Definition: slamch.f:68
Here is the call graph for this function:
Here is the caller graph for this function: