LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ clahef()

subroutine clahef ( character  UPLO,
integer  N,
integer  NB,
integer  KB,
complex, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
complex, dimension( ldw, * )  W,
integer  LDW,
integer  INFO 
)

CLAHEF computes a partial factorization of a complex Hermitian indefinite matrix using the Bunch-Kaufman diagonal pivoting method (blocked algorithm, calling Level 3 BLAS).

Download CLAHEF + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CLAHEF computes a partial factorization of a complex Hermitian
 matrix A using the Bunch-Kaufman diagonal pivoting method. The
 partial factorization has the form:

 A  =  ( I  U12 ) ( A11  0  ) (  I      0     )  if UPLO = 'U', or:
       ( 0  U22 ) (  0   D  ) ( U12**H U22**H )

 A  =  ( L11  0 ) (  D   0  ) ( L11**H L21**H )  if UPLO = 'L'
       ( L21  I ) (  0  A22 ) (  0      I     )

 where the order of D is at most NB. The actual order is returned in
 the argument KB, and is either NB or NB-1, or N if N <= NB.
 Note that U**H denotes the conjugate transpose of U.

 CLAHEF is an auxiliary routine called by CHETRF. It uses blocked code
 (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
 A22 (if UPLO = 'L').
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          Specifies whether the upper or lower triangular part of the
          Hermitian matrix A is stored:
          = 'U':  Upper triangular
          = 'L':  Lower triangular
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in]NB
          NB is INTEGER
          The maximum number of columns of the matrix A that should be
          factored.  NB should be at least 2 to allow for 2-by-2 pivot
          blocks.
[out]KB
          KB is INTEGER
          The number of columns of A that were actually factored.
          KB is either NB-1 or NB, or N if N <= NB.
[in,out]A
          A is COMPLEX array, dimension (LDA,N)
          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
          n-by-n upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading n-by-n lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.
          On exit, A contains details of the partial factorization.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          Details of the interchanges and the block structure of D.

          If UPLO = 'U':
             Only the last KB elements of IPIV are set.

             If IPIV(k) > 0, then rows and columns k and IPIV(k) were
             interchanged and D(k,k) is a 1-by-1 diagonal block.

             If IPIV(k) = IPIV(k-1) < 0, then rows and columns
             k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
             is a 2-by-2 diagonal block.

          If UPLO = 'L':
             Only the first KB elements of IPIV are set.

             If IPIV(k) > 0, then rows and columns k and IPIV(k) were
             interchanged and D(k,k) is a 1-by-1 diagonal block.

             If IPIV(k) = IPIV(k+1) < 0, then rows and columns
             k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1)
             is a 2-by-2 diagonal block.
[out]W
          W is COMPLEX array, dimension (LDW,NB)
[in]LDW
          LDW is INTEGER
          The leading dimension of the array W.  LDW >= max(1,N).
[out]INFO
          INFO is INTEGER
          = 0: successful exit
          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
               has been completed, but the block diagonal matrix D is
               exactly singular.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
  November 2013,  Igor Kozachenko,
                  Computer Science Division,
                  University of California, Berkeley

Definition at line 176 of file clahef.f.

177 *
178 * -- LAPACK computational routine --
179 * -- LAPACK is a software package provided by Univ. of Tennessee, --
180 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
181 *
182 * .. Scalar Arguments ..
183  CHARACTER UPLO
184  INTEGER INFO, KB, LDA, LDW, N, NB
185 * ..
186 * .. Array Arguments ..
187  INTEGER IPIV( * )
188  COMPLEX A( LDA, * ), W( LDW, * )
189 * ..
190 *
191 * =====================================================================
192 *
193 * .. Parameters ..
194  REAL ZERO, ONE
195  parameter( zero = 0.0e+0, one = 1.0e+0 )
196  COMPLEX CONE
197  parameter( cone = ( 1.0e+0, 0.0e+0 ) )
198  REAL EIGHT, SEVTEN
199  parameter( eight = 8.0e+0, sevten = 17.0e+0 )
200 * ..
201 * .. Local Scalars ..
202  INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP,
203  $ KSTEP, KW
204  REAL ABSAKK, ALPHA, COLMAX, R1, ROWMAX, T
205  COMPLEX D11, D21, D22, Z
206 * ..
207 * .. External Functions ..
208  LOGICAL LSAME
209  INTEGER ICAMAX
210  EXTERNAL lsame, icamax
211 * ..
212 * .. External Subroutines ..
213  EXTERNAL ccopy, cgemm, cgemv, clacgv, csscal, cswap
214 * ..
215 * .. Intrinsic Functions ..
216  INTRINSIC abs, aimag, conjg, max, min, real, sqrt
217 * ..
218 * .. Statement Functions ..
219  REAL CABS1
220 * ..
221 * .. Statement Function definitions ..
222  cabs1( z ) = abs( real( z ) ) + abs( aimag( z ) )
223 * ..
224 * .. Executable Statements ..
225 *
226  info = 0
227 *
228 * Initialize ALPHA for use in choosing pivot block size.
229 *
230  alpha = ( one+sqrt( sevten ) ) / eight
231 *
232  IF( lsame( uplo, 'U' ) ) THEN
233 *
234 * Factorize the trailing columns of A using the upper triangle
235 * of A and working backwards, and compute the matrix W = U12*D
236 * for use in updating A11 (note that conjg(W) is actually stored)
237 *
238 * K is the main loop index, decreasing from N in steps of 1 or 2
239 *
240  k = n
241  10 CONTINUE
242 *
243 * KW is the column of W which corresponds to column K of A
244 *
245  kw = nb + k - n
246 *
247 * Exit from loop
248 *
249  IF( ( k.LE.n-nb+1 .AND. nb.LT.n ) .OR. k.LT.1 )
250  $ GO TO 30
251 *
252  kstep = 1
253 *
254 * Copy column K of A to column KW of W and update it
255 *
256  CALL ccopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 )
257  w( k, kw ) = real( a( k, k ) )
258  IF( k.LT.n ) THEN
259  CALL cgemv( 'No transpose', k, n-k, -cone, a( 1, k+1 ), lda,
260  $ w( k, kw+1 ), ldw, cone, w( 1, kw ), 1 )
261  w( k, kw ) = real( w( k, kw ) )
262  END IF
263 *
264 * Determine rows and columns to be interchanged and whether
265 * a 1-by-1 or 2-by-2 pivot block will be used
266 *
267  absakk = abs( real( w( k, kw ) ) )
268 *
269 * IMAX is the row-index of the largest off-diagonal element in
270 * column K, and COLMAX is its absolute value.
271 * Determine both COLMAX and IMAX.
272 *
273  IF( k.GT.1 ) THEN
274  imax = icamax( k-1, w( 1, kw ), 1 )
275  colmax = cabs1( w( imax, kw ) )
276  ELSE
277  colmax = zero
278  END IF
279 *
280  IF( max( absakk, colmax ).EQ.zero ) THEN
281 *
282 * Column K is zero or underflow: set INFO and continue
283 *
284  IF( info.EQ.0 )
285  $ info = k
286  kp = k
287  a( k, k ) = real( a( k, k ) )
288  ELSE
289 *
290 * ============================================================
291 *
292 * BEGIN pivot search
293 *
294 * Case(1)
295  IF( absakk.GE.alpha*colmax ) THEN
296 *
297 * no interchange, use 1-by-1 pivot block
298 *
299  kp = k
300  ELSE
301 *
302 * BEGIN pivot search along IMAX row
303 *
304 *
305 * Copy column IMAX to column KW-1 of W and update it
306 *
307  CALL ccopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ), 1 )
308  w( imax, kw-1 ) = real( a( imax, imax ) )
309  CALL ccopy( k-imax, a( imax, imax+1 ), lda,
310  $ w( imax+1, kw-1 ), 1 )
311  CALL clacgv( k-imax, w( imax+1, kw-1 ), 1 )
312  IF( k.LT.n ) THEN
313  CALL cgemv( 'No transpose', k, n-k, -cone,
314  $ a( 1, k+1 ), lda, w( imax, kw+1 ), ldw,
315  $ cone, w( 1, kw-1 ), 1 )
316  w( imax, kw-1 ) = real( w( imax, kw-1 ) )
317  END IF
318 *
319 * JMAX is the column-index of the largest off-diagonal
320 * element in row IMAX, and ROWMAX is its absolute value.
321 * Determine only ROWMAX.
322 *
323  jmax = imax + icamax( k-imax, w( imax+1, kw-1 ), 1 )
324  rowmax = cabs1( w( jmax, kw-1 ) )
325  IF( imax.GT.1 ) THEN
326  jmax = icamax( imax-1, w( 1, kw-1 ), 1 )
327  rowmax = max( rowmax, cabs1( w( jmax, kw-1 ) ) )
328  END IF
329 *
330 * Case(2)
331  IF( absakk.GE.alpha*colmax*( colmax / rowmax ) ) THEN
332 *
333 * no interchange, use 1-by-1 pivot block
334 *
335  kp = k
336 *
337 * Case(3)
338  ELSE IF( abs( real( w( imax, kw-1 ) ) ).GE.alpha*rowmax )
339  $ THEN
340 *
341 * interchange rows and columns K and IMAX, use 1-by-1
342 * pivot block
343 *
344  kp = imax
345 *
346 * copy column KW-1 of W to column KW of W
347 *
348  CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
349 *
350 * Case(4)
351  ELSE
352 *
353 * interchange rows and columns K-1 and IMAX, use 2-by-2
354 * pivot block
355 *
356  kp = imax
357  kstep = 2
358  END IF
359 *
360 *
361 * END pivot search along IMAX row
362 *
363  END IF
364 *
365 * END pivot search
366 *
367 * ============================================================
368 *
369 * KK is the column of A where pivoting step stopped
370 *
371  kk = k - kstep + 1
372 *
373 * KKW is the column of W which corresponds to column KK of A
374 *
375  kkw = nb + kk - n
376 *
377 * Interchange rows and columns KP and KK.
378 * Updated column KP is already stored in column KKW of W.
379 *
380  IF( kp.NE.kk ) THEN
381 *
382 * Copy non-updated column KK to column KP of submatrix A
383 * at step K. No need to copy element into column K
384 * (or K and K-1 for 2-by-2 pivot) of A, since these columns
385 * will be later overwritten.
386 *
387  a( kp, kp ) = real( a( kk, kk ) )
388  CALL ccopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),
389  $ lda )
390  CALL clacgv( kk-1-kp, a( kp, kp+1 ), lda )
391  IF( kp.GT.1 )
392  $ CALL ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 )
393 *
394 * Interchange rows KK and KP in last K+1 to N columns of A
395 * (columns K (or K and K-1 for 2-by-2 pivot) of A will be
396 * later overwritten). Interchange rows KK and KP
397 * in last KKW to NB columns of W.
398 *
399  IF( k.LT.n )
400  $ CALL cswap( n-k, a( kk, k+1 ), lda, a( kp, k+1 ),
401  $ lda )
402  CALL cswap( n-kk+1, w( kk, kkw ), ldw, w( kp, kkw ),
403  $ ldw )
404  END IF
405 *
406  IF( kstep.EQ.1 ) THEN
407 *
408 * 1-by-1 pivot block D(k): column kw of W now holds
409 *
410 * W(kw) = U(k)*D(k),
411 *
412 * where U(k) is the k-th column of U
413 *
414 * (1) Store subdiag. elements of column U(k)
415 * and 1-by-1 block D(k) in column k of A.
416 * (NOTE: Diagonal element U(k,k) is a UNIT element
417 * and not stored)
418 * A(k,k) := D(k,k) = W(k,kw)
419 * A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k)
420 *
421 * (NOTE: No need to use for Hermitian matrix
422 * A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal
423 * element D(k,k) from W (potentially saves only one load))
424  CALL ccopy( k, w( 1, kw ), 1, a( 1, k ), 1 )
425  IF( k.GT.1 ) THEN
426 *
427 * (NOTE: No need to check if A(k,k) is NOT ZERO,
428 * since that was ensured earlier in pivot search:
429 * case A(k,k) = 0 falls into 2x2 pivot case(4))
430 *
431  r1 = one / real( a( k, k ) )
432  CALL csscal( k-1, r1, a( 1, k ), 1 )
433 *
434 * (2) Conjugate column W(kw)
435 *
436  CALL clacgv( k-1, w( 1, kw ), 1 )
437  END IF
438 *
439  ELSE
440 *
441 * 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold
442 *
443 * ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k)
444 *
445 * where U(k) and U(k-1) are the k-th and (k-1)-th columns
446 * of U
447 *
448 * (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2
449 * block D(k-1:k,k-1:k) in columns k-1 and k of A.
450 * (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT
451 * block and not stored)
452 * A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw)
453 * A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) =
454 * = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) )
455 *
456  IF( k.GT.2 ) THEN
457 *
458 * Factor out the columns of the inverse of 2-by-2 pivot
459 * block D, so that each column contains 1, to reduce the
460 * number of FLOPS when we multiply panel
461 * ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
462 *
463 * D**(-1) = ( d11 cj(d21) )**(-1) =
464 * ( d21 d22 )
465 *
466 * = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
467 * ( (-d21) ( d11 ) )
468 *
469 * = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
470 *
471 * * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
472 * ( ( -1 ) ( d11/conj(d21) ) )
473 *
474 * = 1/(|d21|**2) * 1/(D22*D11-1) *
475 *
476 * * ( d21*( D11 ) conj(d21)*( -1 ) ) =
477 * ( ( -1 ) ( D22 ) )
478 *
479 * = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
480 * ( ( -1 ) ( D22 ) )
481 *
482 * = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
483 * ( ( -1 ) ( D22 ) )
484 *
485 * = ( conj(D21)*( D11 ) D21*( -1 ) )
486 * ( ( -1 ) ( D22 ) ),
487 *
488 * where D11 = d22/d21,
489 * D22 = d11/conj(d21),
490 * D21 = T/d21,
491 * T = 1/(D22*D11-1).
492 *
493 * (NOTE: No need to check for division by ZERO,
494 * since that was ensured earlier in pivot search:
495 * (a) d21 != 0, since in 2x2 pivot case(4)
496 * |d21| should be larger than |d11| and |d22|;
497 * (b) (D22*D11 - 1) != 0, since from (a),
498 * both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
499 *
500  d21 = w( k-1, kw )
501  d11 = w( k, kw ) / conjg( d21 )
502  d22 = w( k-1, kw-1 ) / d21
503  t = one / ( real( d11*d22 )-one )
504  d21 = t / d21
505 *
506 * Update elements in columns A(k-1) and A(k) as
507 * dot products of rows of ( W(kw-1) W(kw) ) and columns
508 * of D**(-1)
509 *
510  DO 20 j = 1, k - 2
511  a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) )
512  a( j, k ) = conjg( d21 )*
513  $ ( d22*w( j, kw )-w( j, kw-1 ) )
514  20 CONTINUE
515  END IF
516 *
517 * Copy D(k) to A
518 *
519  a( k-1, k-1 ) = w( k-1, kw-1 )
520  a( k-1, k ) = w( k-1, kw )
521  a( k, k ) = w( k, kw )
522 *
523 * (2) Conjugate columns W(kw) and W(kw-1)
524 *
525  CALL clacgv( k-1, w( 1, kw ), 1 )
526  CALL clacgv( k-2, w( 1, kw-1 ), 1 )
527 *
528  END IF
529 *
530  END IF
531 *
532 * Store details of the interchanges in IPIV
533 *
534  IF( kstep.EQ.1 ) THEN
535  ipiv( k ) = kp
536  ELSE
537  ipiv( k ) = -kp
538  ipiv( k-1 ) = -kp
539  END IF
540 *
541 * Decrease K and return to the start of the main loop
542 *
543  k = k - kstep
544  GO TO 10
545 *
546  30 CONTINUE
547 *
548 * Update the upper triangle of A11 (= A(1:k,1:k)) as
549 *
550 * A11 := A11 - U12*D*U12**H = A11 - U12*W**H
551 *
552 * computing blocks of NB columns at a time (note that conjg(W) is
553 * actually stored)
554 *
555  DO 50 j = ( ( k-1 ) / nb )*nb + 1, 1, -nb
556  jb = min( nb, k-j+1 )
557 *
558 * Update the upper triangle of the diagonal block
559 *
560  DO 40 jj = j, j + jb - 1
561  a( jj, jj ) = real( a( jj, jj ) )
562  CALL cgemv( 'No transpose', jj-j+1, n-k, -cone,
563  $ a( j, k+1 ), lda, w( jj, kw+1 ), ldw, cone,
564  $ a( j, jj ), 1 )
565  a( jj, jj ) = real( a( jj, jj ) )
566  40 CONTINUE
567 *
568 * Update the rectangular superdiagonal block
569 *
570  CALL cgemm( 'No transpose', 'Transpose', j-1, jb, n-k,
571  $ -cone, a( 1, k+1 ), lda, w( j, kw+1 ), ldw,
572  $ cone, a( 1, j ), lda )
573  50 CONTINUE
574 *
575 * Put U12 in standard form by partially undoing the interchanges
576 * in of rows in columns k+1:n looping backwards from k+1 to n
577 *
578  j = k + 1
579  60 CONTINUE
580 *
581 * Undo the interchanges (if any) of rows J and JP
582 * at each step J
583 *
584 * (Here, J is a diagonal index)
585  jj = j
586  jp = ipiv( j )
587  IF( jp.LT.0 ) THEN
588  jp = -jp
589 * (Here, J is a diagonal index)
590  j = j + 1
591  END IF
592 * (NOTE: Here, J is used to determine row length. Length N-J+1
593 * of the rows to swap back doesn't include diagonal element)
594  j = j + 1
595  IF( jp.NE.jj .AND. j.LE.n )
596  $ CALL cswap( n-j+1, a( jp, j ), lda, a( jj, j ), lda )
597  IF( j.LE.n )
598  $ GO TO 60
599 *
600 * Set KB to the number of columns factorized
601 *
602  kb = n - k
603 *
604  ELSE
605 *
606 * Factorize the leading columns of A using the lower triangle
607 * of A and working forwards, and compute the matrix W = L21*D
608 * for use in updating A22 (note that conjg(W) is actually stored)
609 *
610 * K is the main loop index, increasing from 1 in steps of 1 or 2
611 *
612  k = 1
613  70 CONTINUE
614 *
615 * Exit from loop
616 *
617  IF( ( k.GE.nb .AND. nb.LT.n ) .OR. k.GT.n )
618  $ GO TO 90
619 *
620  kstep = 1
621 *
622 * Copy column K of A to column K of W and update it
623 *
624  w( k, k ) = real( a( k, k ) )
625  IF( k.LT.n )
626  $ CALL ccopy( n-k, a( k+1, k ), 1, w( k+1, k ), 1 )
627  CALL cgemv( 'No transpose', n-k+1, k-1, -cone, a( k, 1 ), lda,
628  $ w( k, 1 ), ldw, cone, w( k, k ), 1 )
629  w( k, k ) = real( w( k, k ) )
630 *
631 * Determine rows and columns to be interchanged and whether
632 * a 1-by-1 or 2-by-2 pivot block will be used
633 *
634  absakk = abs( real( w( k, k ) ) )
635 *
636 * IMAX is the row-index of the largest off-diagonal element in
637 * column K, and COLMAX is its absolute value.
638 * Determine both COLMAX and IMAX.
639 *
640  IF( k.LT.n ) THEN
641  imax = k + icamax( n-k, w( k+1, k ), 1 )
642  colmax = cabs1( w( imax, k ) )
643  ELSE
644  colmax = zero
645  END IF
646 *
647  IF( max( absakk, colmax ).EQ.zero ) THEN
648 *
649 * Column K is zero or underflow: set INFO and continue
650 *
651  IF( info.EQ.0 )
652  $ info = k
653  kp = k
654  a( k, k ) = real( a( k, k ) )
655  ELSE
656 *
657 * ============================================================
658 *
659 * BEGIN pivot search
660 *
661 * Case(1)
662  IF( absakk.GE.alpha*colmax ) THEN
663 *
664 * no interchange, use 1-by-1 pivot block
665 *
666  kp = k
667  ELSE
668 *
669 * BEGIN pivot search along IMAX row
670 *
671 *
672 * Copy column IMAX to column K+1 of W and update it
673 *
674  CALL ccopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 )
675  CALL clacgv( imax-k, w( k, k+1 ), 1 )
676  w( imax, k+1 ) = real( a( imax, imax ) )
677  IF( imax.LT.n )
678  $ CALL ccopy( n-imax, a( imax+1, imax ), 1,
679  $ w( imax+1, k+1 ), 1 )
680  CALL cgemv( 'No transpose', n-k+1, k-1, -cone, a( k, 1 ),
681  $ lda, w( imax, 1 ), ldw, cone, w( k, k+1 ),
682  $ 1 )
683  w( imax, k+1 ) = real( w( imax, k+1 ) )
684 *
685 * JMAX is the column-index of the largest off-diagonal
686 * element in row IMAX, and ROWMAX is its absolute value.
687 * Determine only ROWMAX.
688 *
689  jmax = k - 1 + icamax( imax-k, w( k, k+1 ), 1 )
690  rowmax = cabs1( w( jmax, k+1 ) )
691  IF( imax.LT.n ) THEN
692  jmax = imax + icamax( n-imax, w( imax+1, k+1 ), 1 )
693  rowmax = max( rowmax, cabs1( w( jmax, k+1 ) ) )
694  END IF
695 *
696 * Case(2)
697  IF( absakk.GE.alpha*colmax*( colmax / rowmax ) ) THEN
698 *
699 * no interchange, use 1-by-1 pivot block
700 *
701  kp = k
702 *
703 * Case(3)
704  ELSE IF( abs( real( w( imax, k+1 ) ) ).GE.alpha*rowmax )
705  $ THEN
706 *
707 * interchange rows and columns K and IMAX, use 1-by-1
708 * pivot block
709 *
710  kp = imax
711 *
712 * copy column K+1 of W to column K of W
713 *
714  CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 )
715 *
716 * Case(4)
717  ELSE
718 *
719 * interchange rows and columns K+1 and IMAX, use 2-by-2
720 * pivot block
721 *
722  kp = imax
723  kstep = 2
724  END IF
725 *
726 *
727 * END pivot search along IMAX row
728 *
729  END IF
730 *
731 * END pivot search
732 *
733 * ============================================================
734 *
735 * KK is the column of A where pivoting step stopped
736 *
737  kk = k + kstep - 1
738 *
739 * Interchange rows and columns KP and KK.
740 * Updated column KP is already stored in column KK of W.
741 *
742  IF( kp.NE.kk ) THEN
743 *
744 * Copy non-updated column KK to column KP of submatrix A
745 * at step K. No need to copy element into column K
746 * (or K and K+1 for 2-by-2 pivot) of A, since these columns
747 * will be later overwritten.
748 *
749  a( kp, kp ) = real( a( kk, kk ) )
750  CALL ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),
751  $ lda )
752  CALL clacgv( kp-kk-1, a( kp, kk+1 ), lda )
753  IF( kp.LT.n )
754  $ CALL ccopy( n-kp, a( kp+1, kk ), 1, a( kp+1, kp ), 1 )
755 *
756 * Interchange rows KK and KP in first K-1 columns of A
757 * (columns K (or K and K+1 for 2-by-2 pivot) of A will be
758 * later overwritten). Interchange rows KK and KP
759 * in first KK columns of W.
760 *
761  IF( k.GT.1 )
762  $ CALL cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda )
763  CALL cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw )
764  END IF
765 *
766  IF( kstep.EQ.1 ) THEN
767 *
768 * 1-by-1 pivot block D(k): column k of W now holds
769 *
770 * W(k) = L(k)*D(k),
771 *
772 * where L(k) is the k-th column of L
773 *
774 * (1) Store subdiag. elements of column L(k)
775 * and 1-by-1 block D(k) in column k of A.
776 * (NOTE: Diagonal element L(k,k) is a UNIT element
777 * and not stored)
778 * A(k,k) := D(k,k) = W(k,k)
779 * A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k)
780 *
781 * (NOTE: No need to use for Hermitian matrix
782 * A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal
783 * element D(k,k) from W (potentially saves only one load))
784  CALL ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 )
785  IF( k.LT.n ) THEN
786 *
787 * (NOTE: No need to check if A(k,k) is NOT ZERO,
788 * since that was ensured earlier in pivot search:
789 * case A(k,k) = 0 falls into 2x2 pivot case(4))
790 *
791  r1 = one / real( a( k, k ) )
792  CALL csscal( n-k, r1, a( k+1, k ), 1 )
793 *
794 * (2) Conjugate column W(k)
795 *
796  CALL clacgv( n-k, w( k+1, k ), 1 )
797  END IF
798 *
799  ELSE
800 *
801 * 2-by-2 pivot block D(k): columns k and k+1 of W now hold
802 *
803 * ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
804 *
805 * where L(k) and L(k+1) are the k-th and (k+1)-th columns
806 * of L
807 *
808 * (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2
809 * block D(k:k+1,k:k+1) in columns k and k+1 of A.
810 * (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT
811 * block and not stored)
812 * A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1)
813 * A(k+2:N,k:k+1) := L(k+2:N,k:k+1) =
814 * = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) )
815 *
816  IF( k.LT.n-1 ) THEN
817 *
818 * Factor out the columns of the inverse of 2-by-2 pivot
819 * block D, so that each column contains 1, to reduce the
820 * number of FLOPS when we multiply panel
821 * ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
822 *
823 * D**(-1) = ( d11 cj(d21) )**(-1) =
824 * ( d21 d22 )
825 *
826 * = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
827 * ( (-d21) ( d11 ) )
828 *
829 * = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
830 *
831 * * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
832 * ( ( -1 ) ( d11/conj(d21) ) )
833 *
834 * = 1/(|d21|**2) * 1/(D22*D11-1) *
835 *
836 * * ( d21*( D11 ) conj(d21)*( -1 ) ) =
837 * ( ( -1 ) ( D22 ) )
838 *
839 * = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
840 * ( ( -1 ) ( D22 ) )
841 *
842 * = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
843 * ( ( -1 ) ( D22 ) )
844 *
845 * = ( conj(D21)*( D11 ) D21*( -1 ) )
846 * ( ( -1 ) ( D22 ) )
847 *
848 * where D11 = d22/d21,
849 * D22 = d11/conj(d21),
850 * D21 = T/d21,
851 * T = 1/(D22*D11-1).
852 *
853 * (NOTE: No need to check for division by ZERO,
854 * since that was ensured earlier in pivot search:
855 * (a) d21 != 0, since in 2x2 pivot case(4)
856 * |d21| should be larger than |d11| and |d22|;
857 * (b) (D22*D11 - 1) != 0, since from (a),
858 * both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
859 *
860  d21 = w( k+1, k )
861  d11 = w( k+1, k+1 ) / d21
862  d22 = w( k, k ) / conjg( d21 )
863  t = one / ( real( d11*d22 )-one )
864  d21 = t / d21
865 *
866 * Update elements in columns A(k) and A(k+1) as
867 * dot products of rows of ( W(k) W(k+1) ) and columns
868 * of D**(-1)
869 *
870  DO 80 j = k + 2, n
871  a( j, k ) = conjg( d21 )*
872  $ ( d11*w( j, k )-w( j, k+1 ) )
873  a( j, k+1 ) = d21*( d22*w( j, k+1 )-w( j, k ) )
874  80 CONTINUE
875  END IF
876 *
877 * Copy D(k) to A
878 *
879  a( k, k ) = w( k, k )
880  a( k+1, k ) = w( k+1, k )
881  a( k+1, k+1 ) = w( k+1, k+1 )
882 *
883 * (2) Conjugate columns W(k) and W(k+1)
884 *
885  CALL clacgv( n-k, w( k+1, k ), 1 )
886  CALL clacgv( n-k-1, w( k+2, k+1 ), 1 )
887 *
888  END IF
889 *
890  END IF
891 *
892 * Store details of the interchanges in IPIV
893 *
894  IF( kstep.EQ.1 ) THEN
895  ipiv( k ) = kp
896  ELSE
897  ipiv( k ) = -kp
898  ipiv( k+1 ) = -kp
899  END IF
900 *
901 * Increase K and return to the start of the main loop
902 *
903  k = k + kstep
904  GO TO 70
905 *
906  90 CONTINUE
907 *
908 * Update the lower triangle of A22 (= A(k:n,k:n)) as
909 *
910 * A22 := A22 - L21*D*L21**H = A22 - L21*W**H
911 *
912 * computing blocks of NB columns at a time (note that conjg(W) is
913 * actually stored)
914 *
915  DO 110 j = k, n, nb
916  jb = min( nb, n-j+1 )
917 *
918 * Update the lower triangle of the diagonal block
919 *
920  DO 100 jj = j, j + jb - 1
921  a( jj, jj ) = real( a( jj, jj ) )
922  CALL cgemv( 'No transpose', j+jb-jj, k-1, -cone,
923  $ a( jj, 1 ), lda, w( jj, 1 ), ldw, cone,
924  $ a( jj, jj ), 1 )
925  a( jj, jj ) = real( a( jj, jj ) )
926  100 CONTINUE
927 *
928 * Update the rectangular subdiagonal block
929 *
930  IF( j+jb.LE.n )
931  $ CALL cgemm( 'No transpose', 'Transpose', n-j-jb+1, jb,
932  $ k-1, -cone, a( j+jb, 1 ), lda, w( j, 1 ),
933  $ ldw, cone, a( j+jb, j ), lda )
934  110 CONTINUE
935 *
936 * Put L21 in standard form by partially undoing the interchanges
937 * of rows in columns 1:k-1 looping backwards from k-1 to 1
938 *
939  j = k - 1
940  120 CONTINUE
941 *
942 * Undo the interchanges (if any) of rows J and JP
943 * at each step J
944 *
945 * (Here, J is a diagonal index)
946  jj = j
947  jp = ipiv( j )
948  IF( jp.LT.0 ) THEN
949  jp = -jp
950 * (Here, J is a diagonal index)
951  j = j - 1
952  END IF
953 * (NOTE: Here, J is used to determine row length. Length J
954 * of the rows to swap back doesn't include diagonal element)
955  j = j - 1
956  IF( jp.NE.jj .AND. j.GE.1 )
957  $ CALL cswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda )
958  IF( j.GE.1 )
959  $ GO TO 120
960 *
961 * Set KB to the number of columns factorized
962 *
963  kb = k - 1
964 *
965  END IF
966  RETURN
967 *
968 * End of CLAHEF
969 *
integer function icamax(N, CX, INCX)
ICAMAX
Definition: icamax.f:71
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine ccopy(N, CX, INCX, CY, INCY)
CCOPY
Definition: ccopy.f:81
subroutine csscal(N, SA, CX, INCX)
CSSCAL
Definition: csscal.f:78
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
Definition: cswap.f:81
subroutine cgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
CGEMV
Definition: cgemv.f:158
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:187
subroutine clacgv(N, X, INCX)
CLACGV conjugates a complex vector.
Definition: clacgv.f:74
Here is the call graph for this function:
Here is the caller graph for this function: