LAPACK 3.12.0 LAPACK: Linear Algebra PACKage
Searching...
No Matches
clahef.f
Go to the documentation of this file.
1*> \brief \b CLAHEF computes a partial factorization of a complex Hermitian indefinite matrix using the Bunch-Kaufman diagonal pivoting method (blocked algorithm, calling Level 3 BLAS).
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clahef.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clahef.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clahef.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE CLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
22*
23* .. Scalar Arguments ..
24* CHARACTER UPLO
25* INTEGER INFO, KB, LDA, LDW, N, NB
26* ..
27* .. Array Arguments ..
28* INTEGER IPIV( * )
29* COMPLEX A( LDA, * ), W( LDW, * )
30* ..
31*
32*
33*> \par Purpose:
34* =============
35*>
36*> \verbatim
37*>
38*> CLAHEF computes a partial factorization of a complex Hermitian
39*> matrix A using the Bunch-Kaufman diagonal pivoting method. The
40*> partial factorization has the form:
41*>
42*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or:
43*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H )
44*>
45*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L'
46*> ( L21 I ) ( 0 A22 ) ( 0 I )
47*>
48*> where the order of D is at most NB. The actual order is returned in
49*> the argument KB, and is either NB or NB-1, or N if N <= NB.
50*> Note that U**H denotes the conjugate transpose of U.
51*>
52*> CLAHEF is an auxiliary routine called by CHETRF. It uses blocked code
53*> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
54*> A22 (if UPLO = 'L').
55*> \endverbatim
56*
57* Arguments:
58* ==========
59*
60*> \param[in] UPLO
61*> \verbatim
62*> UPLO is CHARACTER*1
63*> Specifies whether the upper or lower triangular part of the
64*> Hermitian matrix A is stored:
65*> = 'U': Upper triangular
66*> = 'L': Lower triangular
67*> \endverbatim
68*>
69*> \param[in] N
70*> \verbatim
71*> N is INTEGER
72*> The order of the matrix A. N >= 0.
73*> \endverbatim
74*>
75*> \param[in] NB
76*> \verbatim
77*> NB is INTEGER
78*> The maximum number of columns of the matrix A that should be
79*> factored. NB should be at least 2 to allow for 2-by-2 pivot
80*> blocks.
81*> \endverbatim
82*>
83*> \param[out] KB
84*> \verbatim
85*> KB is INTEGER
86*> The number of columns of A that were actually factored.
87*> KB is either NB-1 or NB, or N if N <= NB.
88*> \endverbatim
89*>
90*> \param[in,out] A
91*> \verbatim
92*> A is COMPLEX array, dimension (LDA,N)
93*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
94*> n-by-n upper triangular part of A contains the upper
95*> triangular part of the matrix A, and the strictly lower
96*> triangular part of A is not referenced. If UPLO = 'L', the
97*> leading n-by-n lower triangular part of A contains the lower
98*> triangular part of the matrix A, and the strictly upper
99*> triangular part of A is not referenced.
100*> On exit, A contains details of the partial factorization.
101*> \endverbatim
102*>
103*> \param[in] LDA
104*> \verbatim
105*> LDA is INTEGER
106*> The leading dimension of the array A. LDA >= max(1,N).
107*> \endverbatim
108*>
109*> \param[out] IPIV
110*> \verbatim
111*> IPIV is INTEGER array, dimension (N)
112*> Details of the interchanges and the block structure of D.
113*>
114*> If UPLO = 'U':
115*> Only the last KB elements of IPIV are set.
116*>
117*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were
118*> interchanged and D(k,k) is a 1-by-1 diagonal block.
119*>
120*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns
121*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
122*> is a 2-by-2 diagonal block.
123*>
124*> If UPLO = 'L':
125*> Only the first KB elements of IPIV are set.
126*>
127*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were
128*> interchanged and D(k,k) is a 1-by-1 diagonal block.
129*>
130*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns
131*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1)
132*> is a 2-by-2 diagonal block.
133*> \endverbatim
134*>
135*> \param[out] W
136*> \verbatim
137*> W is COMPLEX array, dimension (LDW,NB)
138*> \endverbatim
139*>
140*> \param[in] LDW
141*> \verbatim
142*> LDW is INTEGER
143*> The leading dimension of the array W. LDW >= max(1,N).
144*> \endverbatim
145*>
146*> \param[out] INFO
147*> \verbatim
148*> INFO is INTEGER
149*> = 0: successful exit
150*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization
151*> has been completed, but the block diagonal matrix D is
152*> exactly singular.
153*> \endverbatim
154*
155* Authors:
156* ========
157*
158*> \author Univ. of Tennessee
159*> \author Univ. of California Berkeley
160*> \author Univ. of Colorado Denver
161*> \author NAG Ltd.
162*
163*> \ingroup lahef
164*
165*> \par Contributors:
166* ==================
167*>
168*> \verbatim
169*>
170*> November 2013, Igor Kozachenko,
171*> Computer Science Division,
172*> University of California, Berkeley
173*> \endverbatim
174*
175* =====================================================================
176 SUBROUTINE clahef( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
177*
178* -- LAPACK computational routine --
179* -- LAPACK is a software package provided by Univ. of Tennessee, --
180* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
181*
182* .. Scalar Arguments ..
183 CHARACTER UPLO
184 INTEGER INFO, KB, LDA, LDW, N, NB
185* ..
186* .. Array Arguments ..
187 INTEGER IPIV( * )
188 COMPLEX A( LDA, * ), W( LDW, * )
189* ..
190*
191* =====================================================================
192*
193* .. Parameters ..
194 REAL ZERO, ONE
195 parameter( zero = 0.0e+0, one = 1.0e+0 )
196 COMPLEX CONE
197 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
198 REAL EIGHT, SEVTEN
199 parameter( eight = 8.0e+0, sevten = 17.0e+0 )
200* ..
201* .. Local Scalars ..
202 INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP,
203 \$ KSTEP, KW
204 REAL ABSAKK, ALPHA, COLMAX, R1, ROWMAX, T
205 COMPLEX D11, D21, D22, Z
206* ..
207* .. External Functions ..
208 LOGICAL LSAME
209 INTEGER ICAMAX
210 EXTERNAL lsame, icamax
211* ..
212* .. External Subroutines ..
213 EXTERNAL ccopy, cgemm, cgemv, clacgv, csscal, cswap
214* ..
215* .. Intrinsic Functions ..
216 INTRINSIC abs, aimag, conjg, max, min, real, sqrt
217* ..
218* .. Statement Functions ..
219 REAL CABS1
220* ..
221* .. Statement Function definitions ..
222 cabs1( z ) = abs( real( z ) ) + abs( aimag( z ) )
223* ..
224* .. Executable Statements ..
225*
226 info = 0
227*
228* Initialize ALPHA for use in choosing pivot block size.
229*
230 alpha = ( one+sqrt( sevten ) ) / eight
231*
232 IF( lsame( uplo, 'U' ) ) THEN
233*
234* Factorize the trailing columns of A using the upper triangle
235* of A and working backwards, and compute the matrix W = U12*D
236* for use in updating A11 (note that conjg(W) is actually stored)
237*
238* K is the main loop index, decreasing from N in steps of 1 or 2
239*
240 k = n
241 10 CONTINUE
242*
243* KW is the column of W which corresponds to column K of A
244*
245 kw = nb + k - n
246*
247* Exit from loop
248*
249 IF( ( k.LE.n-nb+1 .AND. nb.LT.n ) .OR. k.LT.1 )
250 \$ GO TO 30
251*
252 kstep = 1
253*
254* Copy column K of A to column KW of W and update it
255*
256 CALL ccopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 )
257 w( k, kw ) = real( a( k, k ) )
258 IF( k.LT.n ) THEN
259 CALL cgemv( 'No transpose', k, n-k, -cone, a( 1, k+1 ), lda,
260 \$ w( k, kw+1 ), ldw, cone, w( 1, kw ), 1 )
261 w( k, kw ) = real( w( k, kw ) )
262 END IF
263*
264* Determine rows and columns to be interchanged and whether
265* a 1-by-1 or 2-by-2 pivot block will be used
266*
267 absakk = abs( real( w( k, kw ) ) )
268*
269* IMAX is the row-index of the largest off-diagonal element in
270* column K, and COLMAX is its absolute value.
271* Determine both COLMAX and IMAX.
272*
273 IF( k.GT.1 ) THEN
274 imax = icamax( k-1, w( 1, kw ), 1 )
275 colmax = cabs1( w( imax, kw ) )
276 ELSE
277 colmax = zero
278 END IF
279*
280 IF( max( absakk, colmax ).EQ.zero ) THEN
281*
282* Column K is zero or underflow: set INFO and continue
283*
284 IF( info.EQ.0 )
285 \$ info = k
286 kp = k
287 a( k, k ) = real( a( k, k ) )
288 ELSE
289*
290* ============================================================
291*
292* BEGIN pivot search
293*
294* Case(1)
295 IF( absakk.GE.alpha*colmax ) THEN
296*
297* no interchange, use 1-by-1 pivot block
298*
299 kp = k
300 ELSE
301*
302* BEGIN pivot search along IMAX row
303*
304*
305* Copy column IMAX to column KW-1 of W and update it
306*
307 CALL ccopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ), 1 )
308 w( imax, kw-1 ) = real( a( imax, imax ) )
309 CALL ccopy( k-imax, a( imax, imax+1 ), lda,
310 \$ w( imax+1, kw-1 ), 1 )
311 CALL clacgv( k-imax, w( imax+1, kw-1 ), 1 )
312 IF( k.LT.n ) THEN
313 CALL cgemv( 'No transpose', k, n-k, -cone,
314 \$ a( 1, k+1 ), lda, w( imax, kw+1 ), ldw,
315 \$ cone, w( 1, kw-1 ), 1 )
316 w( imax, kw-1 ) = real( w( imax, kw-1 ) )
317 END IF
318*
319* JMAX is the column-index of the largest off-diagonal
320* element in row IMAX, and ROWMAX is its absolute value.
321* Determine only ROWMAX.
322*
323 jmax = imax + icamax( k-imax, w( imax+1, kw-1 ), 1 )
324 rowmax = cabs1( w( jmax, kw-1 ) )
325 IF( imax.GT.1 ) THEN
326 jmax = icamax( imax-1, w( 1, kw-1 ), 1 )
327 rowmax = max( rowmax, cabs1( w( jmax, kw-1 ) ) )
328 END IF
329*
330* Case(2)
331 IF( absakk.GE.alpha*colmax*( colmax / rowmax ) ) THEN
332*
333* no interchange, use 1-by-1 pivot block
334*
335 kp = k
336*
337* Case(3)
338 ELSE IF( abs( real( w( imax, kw-1 ) ) ).GE.alpha*rowmax )
339 \$ THEN
340*
341* interchange rows and columns K and IMAX, use 1-by-1
342* pivot block
343*
344 kp = imax
345*
346* copy column KW-1 of W to column KW of W
347*
348 CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
349*
350* Case(4)
351 ELSE
352*
353* interchange rows and columns K-1 and IMAX, use 2-by-2
354* pivot block
355*
356 kp = imax
357 kstep = 2
358 END IF
359*
360*
361* END pivot search along IMAX row
362*
363 END IF
364*
365* END pivot search
366*
367* ============================================================
368*
369* KK is the column of A where pivoting step stopped
370*
371 kk = k - kstep + 1
372*
373* KKW is the column of W which corresponds to column KK of A
374*
375 kkw = nb + kk - n
376*
377* Interchange rows and columns KP and KK.
378* Updated column KP is already stored in column KKW of W.
379*
380 IF( kp.NE.kk ) THEN
381*
382* Copy non-updated column KK to column KP of submatrix A
383* at step K. No need to copy element into column K
384* (or K and K-1 for 2-by-2 pivot) of A, since these columns
385* will be later overwritten.
386*
387 a( kp, kp ) = real( a( kk, kk ) )
388 CALL ccopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),
389 \$ lda )
390 CALL clacgv( kk-1-kp, a( kp, kp+1 ), lda )
391 IF( kp.GT.1 )
392 \$ CALL ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 )
393*
394* Interchange rows KK and KP in last K+1 to N columns of A
395* (columns K (or K and K-1 for 2-by-2 pivot) of A will be
396* later overwritten). Interchange rows KK and KP
397* in last KKW to NB columns of W.
398*
399 IF( k.LT.n )
400 \$ CALL cswap( n-k, a( kk, k+1 ), lda, a( kp, k+1 ),
401 \$ lda )
402 CALL cswap( n-kk+1, w( kk, kkw ), ldw, w( kp, kkw ),
403 \$ ldw )
404 END IF
405*
406 IF( kstep.EQ.1 ) THEN
407*
408* 1-by-1 pivot block D(k): column kw of W now holds
409*
410* W(kw) = U(k)*D(k),
411*
412* where U(k) is the k-th column of U
413*
414* (1) Store subdiag. elements of column U(k)
415* and 1-by-1 block D(k) in column k of A.
416* (NOTE: Diagonal element U(k,k) is a UNIT element
417* and not stored)
418* A(k,k) := D(k,k) = W(k,kw)
419* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k)
420*
421* (NOTE: No need to use for Hermitian matrix
422* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal
423* element D(k,k) from W (potentially saves only one load))
424 CALL ccopy( k, w( 1, kw ), 1, a( 1, k ), 1 )
425 IF( k.GT.1 ) THEN
426*
427* (NOTE: No need to check if A(k,k) is NOT ZERO,
428* since that was ensured earlier in pivot search:
429* case A(k,k) = 0 falls into 2x2 pivot case(4))
430*
431 r1 = one / real( a( k, k ) )
432 CALL csscal( k-1, r1, a( 1, k ), 1 )
433*
434* (2) Conjugate column W(kw)
435*
436 CALL clacgv( k-1, w( 1, kw ), 1 )
437 END IF
438*
439 ELSE
440*
441* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold
442*
443* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k)
444*
445* where U(k) and U(k-1) are the k-th and (k-1)-th columns
446* of U
447*
448* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2
449* block D(k-1:k,k-1:k) in columns k-1 and k of A.
450* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT
451* block and not stored)
452* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw)
453* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) =
454* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) )
455*
456 IF( k.GT.2 ) THEN
457*
458* Factor out the columns of the inverse of 2-by-2 pivot
459* block D, so that each column contains 1, to reduce the
460* number of FLOPS when we multiply panel
461* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
462*
463* D**(-1) = ( d11 cj(d21) )**(-1) =
464* ( d21 d22 )
465*
466* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
467* ( (-d21) ( d11 ) )
468*
469* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
470*
471* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
472* ( ( -1 ) ( d11/conj(d21) ) )
473*
474* = 1/(|d21|**2) * 1/(D22*D11-1) *
475*
476* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
477* ( ( -1 ) ( D22 ) )
478*
479* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
480* ( ( -1 ) ( D22 ) )
481*
482* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
483* ( ( -1 ) ( D22 ) )
484*
485* = ( conj(D21)*( D11 ) D21*( -1 ) )
486* ( ( -1 ) ( D22 ) ),
487*
488* where D11 = d22/d21,
489* D22 = d11/conj(d21),
490* D21 = T/d21,
491* T = 1/(D22*D11-1).
492*
493* (NOTE: No need to check for division by ZERO,
494* since that was ensured earlier in pivot search:
495* (a) d21 != 0, since in 2x2 pivot case(4)
496* |d21| should be larger than |d11| and |d22|;
497* (b) (D22*D11 - 1) != 0, since from (a),
498* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
499*
500 d21 = w( k-1, kw )
501 d11 = w( k, kw ) / conjg( d21 )
502 d22 = w( k-1, kw-1 ) / d21
503 t = one / ( real( d11*d22 )-one )
504 d21 = t / d21
505*
506* Update elements in columns A(k-1) and A(k) as
507* dot products of rows of ( W(kw-1) W(kw) ) and columns
508* of D**(-1)
509*
510 DO 20 j = 1, k - 2
511 a( j, k-1 ) = d21*( d11*w( j, kw-1 )-w( j, kw ) )
512 a( j, k ) = conjg( d21 )*
513 \$ ( d22*w( j, kw )-w( j, kw-1 ) )
514 20 CONTINUE
515 END IF
516*
517* Copy D(k) to A
518*
519 a( k-1, k-1 ) = w( k-1, kw-1 )
520 a( k-1, k ) = w( k-1, kw )
521 a( k, k ) = w( k, kw )
522*
523* (2) Conjugate columns W(kw) and W(kw-1)
524*
525 CALL clacgv( k-1, w( 1, kw ), 1 )
526 CALL clacgv( k-2, w( 1, kw-1 ), 1 )
527*
528 END IF
529*
530 END IF
531*
532* Store details of the interchanges in IPIV
533*
534 IF( kstep.EQ.1 ) THEN
535 ipiv( k ) = kp
536 ELSE
537 ipiv( k ) = -kp
538 ipiv( k-1 ) = -kp
539 END IF
540*
541* Decrease K and return to the start of the main loop
542*
543 k = k - kstep
544 GO TO 10
545*
546 30 CONTINUE
547*
548* Update the upper triangle of A11 (= A(1:k,1:k)) as
549*
550* A11 := A11 - U12*D*U12**H = A11 - U12*W**H
551*
552* computing blocks of NB columns at a time (note that conjg(W) is
553* actually stored)
554*
555 DO 50 j = ( ( k-1 ) / nb )*nb + 1, 1, -nb
556 jb = min( nb, k-j+1 )
557*
558* Update the upper triangle of the diagonal block
559*
560 DO 40 jj = j, j + jb - 1
561 a( jj, jj ) = real( a( jj, jj ) )
562 CALL cgemv( 'No transpose', jj-j+1, n-k, -cone,
563 \$ a( j, k+1 ), lda, w( jj, kw+1 ), ldw, cone,
564 \$ a( j, jj ), 1 )
565 a( jj, jj ) = real( a( jj, jj ) )
566 40 CONTINUE
567*
568* Update the rectangular superdiagonal block
569*
570 CALL cgemm( 'No transpose', 'Transpose', j-1, jb, n-k,
571 \$ -cone, a( 1, k+1 ), lda, w( j, kw+1 ), ldw,
572 \$ cone, a( 1, j ), lda )
573 50 CONTINUE
574*
575* Put U12 in standard form by partially undoing the interchanges
576* in of rows in columns k+1:n looping backwards from k+1 to n
577*
578 j = k + 1
579 60 CONTINUE
580*
581* Undo the interchanges (if any) of rows J and JP
582* at each step J
583*
584* (Here, J is a diagonal index)
585 jj = j
586 jp = ipiv( j )
587 IF( jp.LT.0 ) THEN
588 jp = -jp
589* (Here, J is a diagonal index)
590 j = j + 1
591 END IF
592* (NOTE: Here, J is used to determine row length. Length N-J+1
593* of the rows to swap back doesn't include diagonal element)
594 j = j + 1
595 IF( jp.NE.jj .AND. j.LE.n )
596 \$ CALL cswap( n-j+1, a( jp, j ), lda, a( jj, j ), lda )
597 IF( j.LE.n )
598 \$ GO TO 60
599*
600* Set KB to the number of columns factorized
601*
602 kb = n - k
603*
604 ELSE
605*
606* Factorize the leading columns of A using the lower triangle
607* of A and working forwards, and compute the matrix W = L21*D
608* for use in updating A22 (note that conjg(W) is actually stored)
609*
610* K is the main loop index, increasing from 1 in steps of 1 or 2
611*
612 k = 1
613 70 CONTINUE
614*
615* Exit from loop
616*
617 IF( ( k.GE.nb .AND. nb.LT.n ) .OR. k.GT.n )
618 \$ GO TO 90
619*
620 kstep = 1
621*
622* Copy column K of A to column K of W and update it
623*
624 w( k, k ) = real( a( k, k ) )
625 IF( k.LT.n )
626 \$ CALL ccopy( n-k, a( k+1, k ), 1, w( k+1, k ), 1 )
627 CALL cgemv( 'No transpose', n-k+1, k-1, -cone, a( k, 1 ), lda,
628 \$ w( k, 1 ), ldw, cone, w( k, k ), 1 )
629 w( k, k ) = real( w( k, k ) )
630*
631* Determine rows and columns to be interchanged and whether
632* a 1-by-1 or 2-by-2 pivot block will be used
633*
634 absakk = abs( real( w( k, k ) ) )
635*
636* IMAX is the row-index of the largest off-diagonal element in
637* column K, and COLMAX is its absolute value.
638* Determine both COLMAX and IMAX.
639*
640 IF( k.LT.n ) THEN
641 imax = k + icamax( n-k, w( k+1, k ), 1 )
642 colmax = cabs1( w( imax, k ) )
643 ELSE
644 colmax = zero
645 END IF
646*
647 IF( max( absakk, colmax ).EQ.zero ) THEN
648*
649* Column K is zero or underflow: set INFO and continue
650*
651 IF( info.EQ.0 )
652 \$ info = k
653 kp = k
654 a( k, k ) = real( a( k, k ) )
655 ELSE
656*
657* ============================================================
658*
659* BEGIN pivot search
660*
661* Case(1)
662 IF( absakk.GE.alpha*colmax ) THEN
663*
664* no interchange, use 1-by-1 pivot block
665*
666 kp = k
667 ELSE
668*
669* BEGIN pivot search along IMAX row
670*
671*
672* Copy column IMAX to column K+1 of W and update it
673*
674 CALL ccopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1 )
675 CALL clacgv( imax-k, w( k, k+1 ), 1 )
676 w( imax, k+1 ) = real( a( imax, imax ) )
677 IF( imax.LT.n )
678 \$ CALL ccopy( n-imax, a( imax+1, imax ), 1,
679 \$ w( imax+1, k+1 ), 1 )
680 CALL cgemv( 'No transpose', n-k+1, k-1, -cone, a( k, 1 ),
681 \$ lda, w( imax, 1 ), ldw, cone, w( k, k+1 ),
682 \$ 1 )
683 w( imax, k+1 ) = real( w( imax, k+1 ) )
684*
685* JMAX is the column-index of the largest off-diagonal
686* element in row IMAX, and ROWMAX is its absolute value.
687* Determine only ROWMAX.
688*
689 jmax = k - 1 + icamax( imax-k, w( k, k+1 ), 1 )
690 rowmax = cabs1( w( jmax, k+1 ) )
691 IF( imax.LT.n ) THEN
692 jmax = imax + icamax( n-imax, w( imax+1, k+1 ), 1 )
693 rowmax = max( rowmax, cabs1( w( jmax, k+1 ) ) )
694 END IF
695*
696* Case(2)
697 IF( absakk.GE.alpha*colmax*( colmax / rowmax ) ) THEN
698*
699* no interchange, use 1-by-1 pivot block
700*
701 kp = k
702*
703* Case(3)
704 ELSE IF( abs( real( w( imax, k+1 ) ) ).GE.alpha*rowmax )
705 \$ THEN
706*
707* interchange rows and columns K and IMAX, use 1-by-1
708* pivot block
709*
710 kp = imax
711*
712* copy column K+1 of W to column K of W
713*
714 CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 )
715*
716* Case(4)
717 ELSE
718*
719* interchange rows and columns K+1 and IMAX, use 2-by-2
720* pivot block
721*
722 kp = imax
723 kstep = 2
724 END IF
725*
726*
727* END pivot search along IMAX row
728*
729 END IF
730*
731* END pivot search
732*
733* ============================================================
734*
735* KK is the column of A where pivoting step stopped
736*
737 kk = k + kstep - 1
738*
739* Interchange rows and columns KP and KK.
740* Updated column KP is already stored in column KK of W.
741*
742 IF( kp.NE.kk ) THEN
743*
744* Copy non-updated column KK to column KP of submatrix A
745* at step K. No need to copy element into column K
746* (or K and K+1 for 2-by-2 pivot) of A, since these columns
747* will be later overwritten.
748*
749 a( kp, kp ) = real( a( kk, kk ) )
750 CALL ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),
751 \$ lda )
752 CALL clacgv( kp-kk-1, a( kp, kk+1 ), lda )
753 IF( kp.LT.n )
754 \$ CALL ccopy( n-kp, a( kp+1, kk ), 1, a( kp+1, kp ), 1 )
755*
756* Interchange rows KK and KP in first K-1 columns of A
757* (columns K (or K and K+1 for 2-by-2 pivot) of A will be
758* later overwritten). Interchange rows KK and KP
759* in first KK columns of W.
760*
761 IF( k.GT.1 )
762 \$ CALL cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda )
763 CALL cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw )
764 END IF
765*
766 IF( kstep.EQ.1 ) THEN
767*
768* 1-by-1 pivot block D(k): column k of W now holds
769*
770* W(k) = L(k)*D(k),
771*
772* where L(k) is the k-th column of L
773*
774* (1) Store subdiag. elements of column L(k)
775* and 1-by-1 block D(k) in column k of A.
776* (NOTE: Diagonal element L(k,k) is a UNIT element
777* and not stored)
778* A(k,k) := D(k,k) = W(k,k)
779* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k)
780*
781* (NOTE: No need to use for Hermitian matrix
782* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal
783* element D(k,k) from W (potentially saves only one load))
784 CALL ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 )
785 IF( k.LT.n ) THEN
786*
787* (NOTE: No need to check if A(k,k) is NOT ZERO,
788* since that was ensured earlier in pivot search:
789* case A(k,k) = 0 falls into 2x2 pivot case(4))
790*
791 r1 = one / real( a( k, k ) )
792 CALL csscal( n-k, r1, a( k+1, k ), 1 )
793*
794* (2) Conjugate column W(k)
795*
796 CALL clacgv( n-k, w( k+1, k ), 1 )
797 END IF
798*
799 ELSE
800*
801* 2-by-2 pivot block D(k): columns k and k+1 of W now hold
802*
803* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
804*
805* where L(k) and L(k+1) are the k-th and (k+1)-th columns
806* of L
807*
808* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2
809* block D(k:k+1,k:k+1) in columns k and k+1 of A.
810* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT
811* block and not stored)
812* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1)
813* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) =
814* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) )
815*
816 IF( k.LT.n-1 ) THEN
817*
818* Factor out the columns of the inverse of 2-by-2 pivot
819* block D, so that each column contains 1, to reduce the
820* number of FLOPS when we multiply panel
821* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
822*
823* D**(-1) = ( d11 cj(d21) )**(-1) =
824* ( d21 d22 )
825*
826* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
827* ( (-d21) ( d11 ) )
828*
829* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
830*
831* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
832* ( ( -1 ) ( d11/conj(d21) ) )
833*
834* = 1/(|d21|**2) * 1/(D22*D11-1) *
835*
836* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
837* ( ( -1 ) ( D22 ) )
838*
839* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
840* ( ( -1 ) ( D22 ) )
841*
842* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
843* ( ( -1 ) ( D22 ) )
844*
845* = ( conj(D21)*( D11 ) D21*( -1 ) )
846* ( ( -1 ) ( D22 ) )
847*
848* where D11 = d22/d21,
849* D22 = d11/conj(d21),
850* D21 = T/d21,
851* T = 1/(D22*D11-1).
852*
853* (NOTE: No need to check for division by ZERO,
854* since that was ensured earlier in pivot search:
855* (a) d21 != 0, since in 2x2 pivot case(4)
856* |d21| should be larger than |d11| and |d22|;
857* (b) (D22*D11 - 1) != 0, since from (a),
858* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
859*
860 d21 = w( k+1, k )
861 d11 = w( k+1, k+1 ) / d21
862 d22 = w( k, k ) / conjg( d21 )
863 t = one / ( real( d11*d22 )-one )
864 d21 = t / d21
865*
866* Update elements in columns A(k) and A(k+1) as
867* dot products of rows of ( W(k) W(k+1) ) and columns
868* of D**(-1)
869*
870 DO 80 j = k + 2, n
871 a( j, k ) = conjg( d21 )*
872 \$ ( d11*w( j, k )-w( j, k+1 ) )
873 a( j, k+1 ) = d21*( d22*w( j, k+1 )-w( j, k ) )
874 80 CONTINUE
875 END IF
876*
877* Copy D(k) to A
878*
879 a( k, k ) = w( k, k )
880 a( k+1, k ) = w( k+1, k )
881 a( k+1, k+1 ) = w( k+1, k+1 )
882*
883* (2) Conjugate columns W(k) and W(k+1)
884*
885 CALL clacgv( n-k, w( k+1, k ), 1 )
886 CALL clacgv( n-k-1, w( k+2, k+1 ), 1 )
887*
888 END IF
889*
890 END IF
891*
892* Store details of the interchanges in IPIV
893*
894 IF( kstep.EQ.1 ) THEN
895 ipiv( k ) = kp
896 ELSE
897 ipiv( k ) = -kp
898 ipiv( k+1 ) = -kp
899 END IF
900*
901* Increase K and return to the start of the main loop
902*
903 k = k + kstep
904 GO TO 70
905*
906 90 CONTINUE
907*
908* Update the lower triangle of A22 (= A(k:n,k:n)) as
909*
910* A22 := A22 - L21*D*L21**H = A22 - L21*W**H
911*
912* computing blocks of NB columns at a time (note that conjg(W) is
913* actually stored)
914*
915 DO 110 j = k, n, nb
916 jb = min( nb, n-j+1 )
917*
918* Update the lower triangle of the diagonal block
919*
920 DO 100 jj = j, j + jb - 1
921 a( jj, jj ) = real( a( jj, jj ) )
922 CALL cgemv( 'No transpose', j+jb-jj, k-1, -cone,
923 \$ a( jj, 1 ), lda, w( jj, 1 ), ldw, cone,
924 \$ a( jj, jj ), 1 )
925 a( jj, jj ) = real( a( jj, jj ) )
926 100 CONTINUE
927*
928* Update the rectangular subdiagonal block
929*
930 IF( j+jb.LE.n )
931 \$ CALL cgemm( 'No transpose', 'Transpose', n-j-jb+1, jb,
932 \$ k-1, -cone, a( j+jb, 1 ), lda, w( j, 1 ),
933 \$ ldw, cone, a( j+jb, j ), lda )
934 110 CONTINUE
935*
936* Put L21 in standard form by partially undoing the interchanges
937* of rows in columns 1:k-1 looping backwards from k-1 to 1
938*
939 j = k - 1
940 120 CONTINUE
941*
942* Undo the interchanges (if any) of rows J and JP
943* at each step J
944*
945* (Here, J is a diagonal index)
946 jj = j
947 jp = ipiv( j )
948 IF( jp.LT.0 ) THEN
949 jp = -jp
950* (Here, J is a diagonal index)
951 j = j - 1
952 END IF
953* (NOTE: Here, J is used to determine row length. Length J
954* of the rows to swap back doesn't include diagonal element)
955 j = j - 1
956 IF( jp.NE.jj .AND. j.GE.1 )
957 \$ CALL cswap( j, a( jp, 1 ), lda, a( jj, 1 ), lda )
958 IF( j.GE.1 )
959 \$ GO TO 120
960*
961* Set KB to the number of columns factorized
962*
963 kb = k - 1
964*
965 END IF
966 RETURN
967*
968* End of CLAHEF
969*
970 END
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
subroutine cgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
CGEMV
Definition cgemv.f:160
subroutine clacgv(n, x, incx)
CLACGV conjugates a complex vector.
Definition clacgv.f:74
subroutine clahef(uplo, n, nb, kb, a, lda, ipiv, w, ldw, info)
CLAHEF computes a partial factorization of a complex Hermitian indefinite matrix using the Bunch-Kauf...
Definition clahef.f:177
subroutine csscal(n, sa, cx, incx)
CSSCAL
Definition csscal.f:78
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81