LAPACK 3.12.0 LAPACK: Linear Algebra PACKage
Searching...
No Matches
clahef_rook.f
Go to the documentation of this file.
1* \brief \b CLAHEF_ROOK computes a partial factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (blocked algorithm, calling Level 3 BLAS).
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clahef_rook.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clahef_rook.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clahef_rook.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE CLAHEF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
22*
23* .. Scalar Arguments ..
24* CHARACTER UPLO
25* INTEGER INFO, KB, LDA, LDW, N, NB
26* ..
27* .. Array Arguments ..
28* INTEGER IPIV( * )
29* COMPLEX A( LDA, * ), W( LDW, * )
30* ..
31*
32*
33*> \par Purpose:
34* =============
35*>
36*> \verbatim
37*>
38*> CLAHEF_ROOK computes a partial factorization of a complex Hermitian
39*> matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting
40*> method. The partial factorization has the form:
41*>
42*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or:
43*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H )
44*>
45*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L'
46*> ( L21 I ) ( 0 A22 ) ( 0 I )
47*>
48*> where the order of D is at most NB. The actual order is returned in
49*> the argument KB, and is either NB or NB-1, or N if N <= NB.
50*> Note that U**H denotes the conjugate transpose of U.
51*>
52*> CLAHEF_ROOK is an auxiliary routine called by CHETRF_ROOK. It uses
53*> blocked code (calling Level 3 BLAS) to update the submatrix
54*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L').
55*> \endverbatim
56*
57* Arguments:
58* ==========
59*
60*> \param[in] UPLO
61*> \verbatim
62*> UPLO is CHARACTER*1
63*> Specifies whether the upper or lower triangular part of the
64*> Hermitian matrix A is stored:
65*> = 'U': Upper triangular
66*> = 'L': Lower triangular
67*> \endverbatim
68*>
69*> \param[in] N
70*> \verbatim
71*> N is INTEGER
72*> The order of the matrix A. N >= 0.
73*> \endverbatim
74*>
75*> \param[in] NB
76*> \verbatim
77*> NB is INTEGER
78*> The maximum number of columns of the matrix A that should be
79*> factored. NB should be at least 2 to allow for 2-by-2 pivot
80*> blocks.
81*> \endverbatim
82*>
83*> \param[out] KB
84*> \verbatim
85*> KB is INTEGER
86*> The number of columns of A that were actually factored.
87*> KB is either NB-1 or NB, or N if N <= NB.
88*> \endverbatim
89*>
90*> \param[in,out] A
91*> \verbatim
92*> A is COMPLEX array, dimension (LDA,N)
93*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
94*> n-by-n upper triangular part of A contains the upper
95*> triangular part of the matrix A, and the strictly lower
96*> triangular part of A is not referenced. If UPLO = 'L', the
97*> leading n-by-n lower triangular part of A contains the lower
98*> triangular part of the matrix A, and the strictly upper
99*> triangular part of A is not referenced.
100*> On exit, A contains details of the partial factorization.
101*> \endverbatim
102*>
103*> \param[in] LDA
104*> \verbatim
105*> LDA is INTEGER
106*> The leading dimension of the array A. LDA >= max(1,N).
107*> \endverbatim
108*>
109*> \param[out] IPIV
110*> \verbatim
111*> IPIV is INTEGER array, dimension (N)
112*> Details of the interchanges and the block structure of D.
113*>
114*> If UPLO = 'U':
115*> Only the last KB elements of IPIV are set.
116*>
117*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were
118*> interchanged and D(k,k) is a 1-by-1 diagonal block.
119*>
120*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and
121*> columns k and -IPIV(k) were interchanged and rows and
122*> columns k-1 and -IPIV(k-1) were inerchaged,
123*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block.
124*>
125*> If UPLO = 'L':
126*> Only the first KB elements of IPIV are set.
127*>
128*> If IPIV(k) > 0, then rows and columns k and IPIV(k)
129*> were interchanged and D(k,k) is a 1-by-1 diagonal block.
130*>
131*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and
132*> columns k and -IPIV(k) were interchanged and rows and
133*> columns k+1 and -IPIV(k+1) were inerchaged,
134*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
135*> \endverbatim
136*>
137*> \param[out] W
138*> \verbatim
139*> W is COMPLEX array, dimension (LDW,NB)
140*> \endverbatim
141*>
142*> \param[in] LDW
143*> \verbatim
144*> LDW is INTEGER
145*> The leading dimension of the array W. LDW >= max(1,N).
146*> \endverbatim
147*>
148*> \param[out] INFO
149*> \verbatim
150*> INFO is INTEGER
151*> = 0: successful exit
152*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization
153*> has been completed, but the block diagonal matrix D is
154*> exactly singular.
155*> \endverbatim
156*
157* Authors:
158* ========
159*
160*> \author Univ. of Tennessee
161*> \author Univ. of California Berkeley
162*> \author Univ. of Colorado Denver
163*> \author NAG Ltd.
164*
165*> \ingroup lahef_rook
166*
167*> \par Contributors:
168* ==================
169*>
170*> \verbatim
171*>
172*> November 2013, Igor Kozachenko,
173*> Computer Science Division,
174*> University of California, Berkeley
175*>
176*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas,
177*> School of Mathematics,
178*> University of Manchester
179*> \endverbatim
180*
181* =====================================================================
182 SUBROUTINE clahef_rook( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW,
183 \$ INFO )
184*
185* -- LAPACK computational routine --
186* -- LAPACK is a software package provided by Univ. of Tennessee, --
187* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
188*
189* .. Scalar Arguments ..
190 CHARACTER UPLO
191 INTEGER INFO, KB, LDA, LDW, N, NB
192* ..
193* .. Array Arguments ..
194 INTEGER IPIV( * )
195 COMPLEX A( LDA, * ), W( LDW, * )
196* ..
197*
198* =====================================================================
199*
200* .. Parameters ..
201 REAL ZERO, ONE
202 parameter( zero = 0.0e+0, one = 1.0e+0 )
203 COMPLEX CONE
204 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
205 REAL EIGHT, SEVTEN
206 parameter( eight = 8.0e+0, sevten = 17.0e+0 )
207* ..
208* .. Local Scalars ..
209 LOGICAL DONE
210 INTEGER IMAX, ITEMP, II, J, JB, JJ, JMAX, JP1, JP2, K,
211 \$ kk, kkw, kp, kstep, kw, p
212 REAL ABSAKK, ALPHA, COLMAX, STEMP, R1, ROWMAX, T,
213 \$ sfmin
214 COMPLEX D11, D21, D22, Z
215* ..
216* .. External Functions ..
217 LOGICAL LSAME
218 INTEGER ICAMAX
219 REAL SLAMCH
220 EXTERNAL lsame, icamax, slamch
221* ..
222* .. External Subroutines ..
223 EXTERNAL ccopy, csscal, cgemm, cgemv, clacgv, cswap
224* ..
225* .. Intrinsic Functions ..
226 INTRINSIC abs, conjg, aimag, max, min, real, sqrt
227* ..
228* .. Statement Functions ..
229 REAL CABS1
230* ..
231* .. Statement Function definitions ..
232 cabs1( z ) = abs( real( z ) ) + abs( aimag( z ) )
233* ..
234* .. Executable Statements ..
235*
236 info = 0
237*
238* Initialize ALPHA for use in choosing pivot block size.
239*
240 alpha = ( one+sqrt( sevten ) ) / eight
241*
242* Compute machine safe minimum
243*
244 sfmin = slamch( 'S' )
245*
246 IF( lsame( uplo, 'U' ) ) THEN
247*
248* Factorize the trailing columns of A using the upper triangle
249* of A and working backwards, and compute the matrix W = U12*D
250* for use in updating A11 (note that conjg(W) is actually stored)
251*
252* K is the main loop index, decreasing from N in steps of 1 or 2
253*
254 k = n
255 10 CONTINUE
256*
257* KW is the column of W which corresponds to column K of A
258*
259 kw = nb + k - n
260*
261* Exit from loop
262*
263 IF( ( k.LE.n-nb+1 .AND. nb.LT.n ) .OR. k.LT.1 )
264 \$ GO TO 30
265*
266 kstep = 1
267 p = k
268*
269* Copy column K of A to column KW of W and update it
270*
271 IF( k.GT.1 )
272 \$ CALL ccopy( k-1, a( 1, k ), 1, w( 1, kw ), 1 )
273 w( k, kw ) = real( a( k, k ) )
274 IF( k.LT.n ) THEN
275 CALL cgemv( 'No transpose', k, n-k, -cone, a( 1, k+1 ), lda,
276 \$ w( k, kw+1 ), ldw, cone, w( 1, kw ), 1 )
277 w( k, kw ) = real( w( k, kw ) )
278 END IF
279*
280* Determine rows and columns to be interchanged and whether
281* a 1-by-1 or 2-by-2 pivot block will be used
282*
283 absakk = abs( real( w( k, kw ) ) )
284*
285* IMAX is the row-index of the largest off-diagonal element in
286* column K, and COLMAX is its absolute value.
287* Determine both COLMAX and IMAX.
288*
289 IF( k.GT.1 ) THEN
290 imax = icamax( k-1, w( 1, kw ), 1 )
291 colmax = cabs1( w( imax, kw ) )
292 ELSE
293 colmax = zero
294 END IF
295*
296 IF( max( absakk, colmax ).EQ.zero ) THEN
297*
298* Column K is zero or underflow: set INFO and continue
299*
300 IF( info.EQ.0 )
301 \$ info = k
302 kp = k
303 a( k, k ) = real( w( k, kw ) )
304 IF( k.GT.1 )
305 \$ CALL ccopy( k-1, w( 1, kw ), 1, a( 1, k ), 1 )
306 ELSE
307*
308* ============================================================
309*
310* BEGIN pivot search
311*
312* Case(1)
313* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX
314* (used to handle NaN and Inf)
315 IF( .NOT.( absakk.LT.alpha*colmax ) ) THEN
316*
317* no interchange, use 1-by-1 pivot block
318*
319 kp = k
320*
321 ELSE
322*
323* Lop until pivot found
324*
325 done = .false.
326*
327 12 CONTINUE
328*
329* BEGIN pivot search loop body
330*
331*
332* Copy column IMAX to column KW-1 of W and update it
333*
334 IF( imax.GT.1 )
335 \$ CALL ccopy( imax-1, a( 1, imax ), 1, w( 1, kw-1 ),
336 \$ 1 )
337 w( imax, kw-1 ) = real( a( imax, imax ) )
338*
339 CALL ccopy( k-imax, a( imax, imax+1 ), lda,
340 \$ w( imax+1, kw-1 ), 1 )
341 CALL clacgv( k-imax, w( imax+1, kw-1 ), 1 )
342*
343 IF( k.LT.n ) THEN
344 CALL cgemv( 'No transpose', k, n-k, -cone,
345 \$ a( 1, k+1 ), lda, w( imax, kw+1 ), ldw,
346 \$ cone, w( 1, kw-1 ), 1 )
347 w( imax, kw-1 ) = real( w( imax, kw-1 ) )
348 END IF
349*
350* JMAX is the column-index of the largest off-diagonal
351* element in row IMAX, and ROWMAX is its absolute value.
352* Determine both ROWMAX and JMAX.
353*
354 IF( imax.NE.k ) THEN
355 jmax = imax + icamax( k-imax, w( imax+1, kw-1 ),
356 \$ 1 )
357 rowmax = cabs1( w( jmax, kw-1 ) )
358 ELSE
359 rowmax = zero
360 END IF
361*
362 IF( imax.GT.1 ) THEN
363 itemp = icamax( imax-1, w( 1, kw-1 ), 1 )
364 stemp = cabs1( w( itemp, kw-1 ) )
365 IF( stemp.GT.rowmax ) THEN
366 rowmax = stemp
367 jmax = itemp
368 END IF
369 END IF
370*
371* Case(2)
372* Equivalent to testing for
373* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX
374* (used to handle NaN and Inf)
375*
376 IF( .NOT.( abs( real( w( imax,kw-1 ) ) )
377 \$ .LT.alpha*rowmax ) ) THEN
378*
379* interchange rows and columns K and IMAX,
380* use 1-by-1 pivot block
381*
382 kp = imax
383*
384* copy column KW-1 of W to column KW of W
385*
386 CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
387*
388 done = .true.
389*
390* Case(3)
391* Equivalent to testing for ROWMAX.EQ.COLMAX,
392* (used to handle NaN and Inf)
393*
394 ELSE IF( ( p.EQ.jmax ) .OR. ( rowmax.LE.colmax ) )
395 \$ THEN
396*
397* interchange rows and columns K-1 and IMAX,
398* use 2-by-2 pivot block
399*
400 kp = imax
401 kstep = 2
402 done = .true.
403*
404* Case(4)
405 ELSE
406*
408*
409 p = imax
410 colmax = rowmax
411 imax = jmax
412*
413* Copy updated JMAXth (next IMAXth) column to Kth of W
414*
415 CALL ccopy( k, w( 1, kw-1 ), 1, w( 1, kw ), 1 )
416*
417 END IF
418*
419*
420* END pivot search loop body
421*
422 IF( .NOT.done ) GOTO 12
423*
424 END IF
425*
426* END pivot search
427*
428* ============================================================
429*
430* KK is the column of A where pivoting step stopped
431*
432 kk = k - kstep + 1
433*
434* KKW is the column of W which corresponds to column KK of A
435*
436 kkw = nb + kk - n
437*
438* Interchange rows and columns P and K.
439* Updated column P is already stored in column KW of W.
440*
441 IF( ( kstep.EQ.2 ) .AND. ( p.NE.k ) ) THEN
442*
443* Copy non-updated column K to column P of submatrix A
444* at step K. No need to copy element into columns
445* K and K-1 of A for 2-by-2 pivot, since these columns
446* will be later overwritten.
447*
448 a( p, p ) = real( a( k, k ) )
449 CALL ccopy( k-1-p, a( p+1, k ), 1, a( p, p+1 ),
450 \$ lda )
451 CALL clacgv( k-1-p, a( p, p+1 ), lda )
452 IF( p.GT.1 )
453 \$ CALL ccopy( p-1, a( 1, k ), 1, a( 1, p ), 1 )
454*
455* Interchange rows K and P in the last K+1 to N columns of A
456* (columns K and K-1 of A for 2-by-2 pivot will be
457* later overwritten). Interchange rows K and P
458* in last KKW to NB columns of W.
459*
460 IF( k.LT.n )
461 \$ CALL cswap( n-k, a( k, k+1 ), lda, a( p, k+1 ),
462 \$ lda )
463 CALL cswap( n-kk+1, w( k, kkw ), ldw, w( p, kkw ),
464 \$ ldw )
465 END IF
466*
467* Interchange rows and columns KP and KK.
468* Updated column KP is already stored in column KKW of W.
469*
470 IF( kp.NE.kk ) THEN
471*
472* Copy non-updated column KK to column KP of submatrix A
473* at step K. No need to copy element into column K
474* (or K and K-1 for 2-by-2 pivot) of A, since these columns
475* will be later overwritten.
476*
477 a( kp, kp ) = real( a( kk, kk ) )
478 CALL ccopy( kk-1-kp, a( kp+1, kk ), 1, a( kp, kp+1 ),
479 \$ lda )
480 CALL clacgv( kk-1-kp, a( kp, kp+1 ), lda )
481 IF( kp.GT.1 )
482 \$ CALL ccopy( kp-1, a( 1, kk ), 1, a( 1, kp ), 1 )
483*
484* Interchange rows KK and KP in last K+1 to N columns of A
485* (columns K (or K and K-1 for 2-by-2 pivot) of A will be
486* later overwritten). Interchange rows KK and KP
487* in last KKW to NB columns of W.
488*
489 IF( k.LT.n )
490 \$ CALL cswap( n-k, a( kk, k+1 ), lda, a( kp, k+1 ),
491 \$ lda )
492 CALL cswap( n-kk+1, w( kk, kkw ), ldw, w( kp, kkw ),
493 \$ ldw )
494 END IF
495*
496 IF( kstep.EQ.1 ) THEN
497*
498* 1-by-1 pivot block D(k): column kw of W now holds
499*
500* W(kw) = U(k)*D(k),
501*
502* where U(k) is the k-th column of U
503*
504* (1) Store subdiag. elements of column U(k)
505* and 1-by-1 block D(k) in column k of A.
506* (NOTE: Diagonal element U(k,k) is a UNIT element
507* and not stored)
508* A(k,k) := D(k,k) = W(k,kw)
509* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k)
510*
511* (NOTE: No need to use for Hermitian matrix
512* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal
513* element D(k,k) from W (potentially saves only one load))
514 CALL ccopy( k, w( 1, kw ), 1, a( 1, k ), 1 )
515 IF( k.GT.1 ) THEN
516*
517* (NOTE: No need to check if A(k,k) is NOT ZERO,
518* since that was ensured earlier in pivot search:
519* case A(k,k) = 0 falls into 2x2 pivot case(3))
520*
521* Handle division by a small number
522*
523 t = real( a( k, k ) )
524 IF( abs( t ).GE.sfmin ) THEN
525 r1 = one / t
526 CALL csscal( k-1, r1, a( 1, k ), 1 )
527 ELSE
528 DO 14 ii = 1, k-1
529 a( ii, k ) = a( ii, k ) / t
530 14 CONTINUE
531 END IF
532*
533* (2) Conjugate column W(kw)
534*
535 CALL clacgv( k-1, w( 1, kw ), 1 )
536 END IF
537*
538 ELSE
539*
540* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold
541*
542* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k)
543*
544* where U(k) and U(k-1) are the k-th and (k-1)-th columns
545* of U
546*
547* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2
548* block D(k-1:k,k-1:k) in columns k-1 and k of A.
549* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT
550* block and not stored)
551* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw)
552* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) =
553* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) )
554*
555 IF( k.GT.2 ) THEN
556*
557* Factor out the columns of the inverse of 2-by-2 pivot
558* block D, so that each column contains 1, to reduce the
559* number of FLOPS when we multiply panel
560* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
561*
562* D**(-1) = ( d11 cj(d21) )**(-1) =
563* ( d21 d22 )
564*
565* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
566* ( (-d21) ( d11 ) )
567*
568* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
569*
570* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
571* ( ( -1 ) ( d11/conj(d21) ) )
572*
573* = 1/(|d21|**2) * 1/(D22*D11-1) *
574*
575* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
576* ( ( -1 ) ( D22 ) )
577*
578* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
579* ( ( -1 ) ( D22 ) )
580*
581* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
582* ( ( -1 ) ( D22 ) )
583*
584* Handle division by a small number. (NOTE: order of
585* operations is important)
586*
587* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) )
588* ( (( -1 ) ) (( D22 ) ) ),
589*
590* where D11 = d22/d21,
591* D22 = d11/conj(d21),
592* D21 = d21,
593* T = 1/(D22*D11-1).
594*
595* (NOTE: No need to check for division by ZERO,
596* since that was ensured earlier in pivot search:
597* (a) d21 != 0 in 2x2 pivot case(4),
598* since |d21| should be larger than |d11| and |d22|;
599* (b) (D22*D11 - 1) != 0, since from (a),
600* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
601*
602 d21 = w( k-1, kw )
603 d11 = w( k, kw ) / conjg( d21 )
604 d22 = w( k-1, kw-1 ) / d21
605 t = one / ( real( d11*d22 )-one )
606*
607* Update elements in columns A(k-1) and A(k) as
608* dot products of rows of ( W(kw-1) W(kw) ) and columns
609* of D**(-1)
610*
611 DO 20 j = 1, k - 2
612 a( j, k-1 ) = t*( ( d11*w( j, kw-1 )-w( j, kw ) ) /
613 \$ d21 )
614 a( j, k ) = t*( ( d22*w( j, kw )-w( j, kw-1 ) ) /
615 \$ conjg( d21 ) )
616 20 CONTINUE
617 END IF
618*
619* Copy D(k) to A
620*
621 a( k-1, k-1 ) = w( k-1, kw-1 )
622 a( k-1, k ) = w( k-1, kw )
623 a( k, k ) = w( k, kw )
624*
625* (2) Conjugate columns W(kw) and W(kw-1)
626*
627 CALL clacgv( k-1, w( 1, kw ), 1 )
628 CALL clacgv( k-2, w( 1, kw-1 ), 1 )
629*
630 END IF
631*
632 END IF
633*
634* Store details of the interchanges in IPIV
635*
636 IF( kstep.EQ.1 ) THEN
637 ipiv( k ) = kp
638 ELSE
639 ipiv( k ) = -p
640 ipiv( k-1 ) = -kp
641 END IF
642*
643* Decrease K and return to the start of the main loop
644*
645 k = k - kstep
646 GO TO 10
647*
648 30 CONTINUE
649*
650* Update the upper triangle of A11 (= A(1:k,1:k)) as
651*
652* A11 := A11 - U12*D*U12**H = A11 - U12*W**H
653*
654* computing blocks of NB columns at a time (note that conjg(W) is
655* actually stored)
656*
657 DO 50 j = ( ( k-1 ) / nb )*nb + 1, 1, -nb
658 jb = min( nb, k-j+1 )
659*
660* Update the upper triangle of the diagonal block
661*
662 DO 40 jj = j, j + jb - 1
663 a( jj, jj ) = real( a( jj, jj ) )
664 CALL cgemv( 'No transpose', jj-j+1, n-k, -cone,
665 \$ a( j, k+1 ), lda, w( jj, kw+1 ), ldw, cone,
666 \$ a( j, jj ), 1 )
667 a( jj, jj ) = real( a( jj, jj ) )
668 40 CONTINUE
669*
670* Update the rectangular superdiagonal block
671*
672 IF( j.GE.2 )
673 \$ CALL cgemm( 'No transpose', 'Transpose', j-1, jb, n-k,
674 \$ -cone, a( 1, k+1 ), lda, w( j, kw+1 ), ldw,
675 \$ cone, a( 1, j ), lda )
676 50 CONTINUE
677*
678* Put U12 in standard form by partially undoing the interchanges
679* in of rows in columns k+1:n looping backwards from k+1 to n
680*
681 j = k + 1
682 60 CONTINUE
683*
684* Undo the interchanges (if any) of rows J and JP2
685* (or J and JP2, and J+1 and JP1) at each step J
686*
687 kstep = 1
688 jp1 = 1
689* (Here, J is a diagonal index)
690 jj = j
691 jp2 = ipiv( j )
692 IF( jp2.LT.0 ) THEN
693 jp2 = -jp2
694* (Here, J is a diagonal index)
695 j = j + 1
696 jp1 = -ipiv( j )
697 kstep = 2
698 END IF
699* (NOTE: Here, J is used to determine row length. Length N-J+1
700* of the rows to swap back doesn't include diagonal element)
701 j = j + 1
702 IF( jp2.NE.jj .AND. j.LE.n )
703 \$ CALL cswap( n-j+1, a( jp2, j ), lda, a( jj, j ), lda )
704 jj = jj + 1
705 IF( kstep.EQ.2 .AND. jp1.NE.jj .AND. j.LE.n )
706 \$ CALL cswap( n-j+1, a( jp1, j ), lda, a( jj, j ), lda )
707 IF( j.LT.n )
708 \$ GO TO 60
709*
710* Set KB to the number of columns factorized
711*
712 kb = n - k
713*
714 ELSE
715*
716* Factorize the leading columns of A using the lower triangle
717* of A and working forwards, and compute the matrix W = L21*D
718* for use in updating A22 (note that conjg(W) is actually stored)
719*
720* K is the main loop index, increasing from 1 in steps of 1 or 2
721*
722 k = 1
723 70 CONTINUE
724*
725* Exit from loop
726*
727 IF( ( k.GE.nb .AND. nb.LT.n ) .OR. k.GT.n )
728 \$ GO TO 90
729*
730 kstep = 1
731 p = k
732*
733* Copy column K of A to column K of W and update column K of W
734*
735 w( k, k ) = real( a( k, k ) )
736 IF( k.LT.n )
737 \$ CALL ccopy( n-k, a( k+1, k ), 1, w( k+1, k ), 1 )
738 IF( k.GT.1 ) THEN
739 CALL cgemv( 'No transpose', n-k+1, k-1, -cone, a( k, 1 ),
740 \$ lda, w( k, 1 ), ldw, cone, w( k, k ), 1 )
741 w( k, k ) = real( w( k, k ) )
742 END IF
743*
744* Determine rows and columns to be interchanged and whether
745* a 1-by-1 or 2-by-2 pivot block will be used
746*
747 absakk = abs( real( w( k, k ) ) )
748*
749* IMAX is the row-index of the largest off-diagonal element in
750* column K, and COLMAX is its absolute value.
751* Determine both COLMAX and IMAX.
752*
753 IF( k.LT.n ) THEN
754 imax = k + icamax( n-k, w( k+1, k ), 1 )
755 colmax = cabs1( w( imax, k ) )
756 ELSE
757 colmax = zero
758 END IF
759*
760 IF( max( absakk, colmax ).EQ.zero ) THEN
761*
762* Column K is zero or underflow: set INFO and continue
763*
764 IF( info.EQ.0 )
765 \$ info = k
766 kp = k
767 a( k, k ) = real( w( k, k ) )
768 IF( k.LT.n )
769 \$ CALL ccopy( n-k, w( k+1, k ), 1, a( k+1, k ), 1 )
770 ELSE
771*
772* ============================================================
773*
774* BEGIN pivot search
775*
776* Case(1)
777* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX
778* (used to handle NaN and Inf)
779*
780 IF( .NOT.( absakk.LT.alpha*colmax ) ) THEN
781*
782* no interchange, use 1-by-1 pivot block
783*
784 kp = k
785*
786 ELSE
787*
788 done = .false.
789*
790* Loop until pivot found
791*
792 72 CONTINUE
793*
794* BEGIN pivot search loop body
795*
796*
797* Copy column IMAX to column k+1 of W and update it
798*
799 CALL ccopy( imax-k, a( imax, k ), lda, w( k, k+1 ), 1)
800 CALL clacgv( imax-k, w( k, k+1 ), 1 )
801 w( imax, k+1 ) = real( a( imax, imax ) )
802*
803 IF( imax.LT.n )
804 \$ CALL ccopy( n-imax, a( imax+1, imax ), 1,
805 \$ w( imax+1, k+1 ), 1 )
806*
807 IF( k.GT.1 ) THEN
808 CALL cgemv( 'No transpose', n-k+1, k-1, -cone,
809 \$ a( k, 1 ), lda, w( imax, 1 ), ldw,
810 \$ cone, w( k, k+1 ), 1 )
811 w( imax, k+1 ) = real( w( imax, k+1 ) )
812 END IF
813*
814* JMAX is the column-index of the largest off-diagonal
815* element in row IMAX, and ROWMAX is its absolute value.
816* Determine both ROWMAX and JMAX.
817*
818 IF( imax.NE.k ) THEN
819 jmax = k - 1 + icamax( imax-k, w( k, k+1 ), 1 )
820 rowmax = cabs1( w( jmax, k+1 ) )
821 ELSE
822 rowmax = zero
823 END IF
824*
825 IF( imax.LT.n ) THEN
826 itemp = imax + icamax( n-imax, w( imax+1, k+1 ), 1)
827 stemp = cabs1( w( itemp, k+1 ) )
828 IF( stemp.GT.rowmax ) THEN
829 rowmax = stemp
830 jmax = itemp
831 END IF
832 END IF
833*
834* Case(2)
835* Equivalent to testing for
836* ABS( REAL( W( IMAX,K+1 ) ) ).GE.ALPHA*ROWMAX
837* (used to handle NaN and Inf)
838*
839 IF( .NOT.( abs( real( w( imax,k+1 ) ) )
840 \$ .LT.alpha*rowmax ) ) THEN
841*
842* interchange rows and columns K and IMAX,
843* use 1-by-1 pivot block
844*
845 kp = imax
846*
847* copy column K+1 of W to column K of W
848*
849 CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 )
850*
851 done = .true.
852*
853* Case(3)
854* Equivalent to testing for ROWMAX.EQ.COLMAX,
855* (used to handle NaN and Inf)
856*
857 ELSE IF( ( p.EQ.jmax ) .OR. ( rowmax.LE.colmax ) )
858 \$ THEN
859*
860* interchange rows and columns K+1 and IMAX,
861* use 2-by-2 pivot block
862*
863 kp = imax
864 kstep = 2
865 done = .true.
866*
867* Case(4)
868 ELSE
869*
871*
872 p = imax
873 colmax = rowmax
874 imax = jmax
875*
876* Copy updated JMAXth (next IMAXth) column to Kth of W
877*
878 CALL ccopy( n-k+1, w( k, k+1 ), 1, w( k, k ), 1 )
879*
880 END IF
881*
882*
883* End pivot search loop body
884*
885 IF( .NOT.done ) GOTO 72
886*
887 END IF
888*
889* END pivot search
890*
891* ============================================================
892*
893* KK is the column of A where pivoting step stopped
894*
895 kk = k + kstep - 1
896*
897* Interchange rows and columns P and K (only for 2-by-2 pivot).
898* Updated column P is already stored in column K of W.
899*
900 IF( ( kstep.EQ.2 ) .AND. ( p.NE.k ) ) THEN
901*
902* Copy non-updated column KK-1 to column P of submatrix A
903* at step K. No need to copy element into columns
904* K and K+1 of A for 2-by-2 pivot, since these columns
905* will be later overwritten.
906*
907 a( p, p ) = real( a( k, k ) )
908 CALL ccopy( p-k-1, a( k+1, k ), 1, a( p, k+1 ), lda )
909 CALL clacgv( p-k-1, a( p, k+1 ), lda )
910 IF( p.LT.n )
911 \$ CALL ccopy( n-p, a( p+1, k ), 1, a( p+1, p ), 1 )
912*
913* Interchange rows K and P in first K-1 columns of A
914* (columns K and K+1 of A for 2-by-2 pivot will be
915* later overwritten). Interchange rows K and P
916* in first KK columns of W.
917*
918 IF( k.GT.1 )
919 \$ CALL cswap( k-1, a( k, 1 ), lda, a( p, 1 ), lda )
920 CALL cswap( kk, w( k, 1 ), ldw, w( p, 1 ), ldw )
921 END IF
922*
923* Interchange rows and columns KP and KK.
924* Updated column KP is already stored in column KK of W.
925*
926 IF( kp.NE.kk ) THEN
927*
928* Copy non-updated column KK to column KP of submatrix A
929* at step K. No need to copy element into column K
930* (or K and K+1 for 2-by-2 pivot) of A, since these columns
931* will be later overwritten.
932*
933 a( kp, kp ) = real( a( kk, kk ) )
934 CALL ccopy( kp-kk-1, a( kk+1, kk ), 1, a( kp, kk+1 ),
935 \$ lda )
936 CALL clacgv( kp-kk-1, a( kp, kk+1 ), lda )
937 IF( kp.LT.n )
938 \$ CALL ccopy( n-kp, a( kp+1, kk ), 1, a( kp+1, kp ), 1 )
939*
940* Interchange rows KK and KP in first K-1 columns of A
941* (column K (or K and K+1 for 2-by-2 pivot) of A will be
942* later overwritten). Interchange rows KK and KP
943* in first KK columns of W.
944*
945 IF( k.GT.1 )
946 \$ CALL cswap( k-1, a( kk, 1 ), lda, a( kp, 1 ), lda )
947 CALL cswap( kk, w( kk, 1 ), ldw, w( kp, 1 ), ldw )
948 END IF
949*
950 IF( kstep.EQ.1 ) THEN
951*
952* 1-by-1 pivot block D(k): column k of W now holds
953*
954* W(k) = L(k)*D(k),
955*
956* where L(k) is the k-th column of L
957*
958* (1) Store subdiag. elements of column L(k)
959* and 1-by-1 block D(k) in column k of A.
960* (NOTE: Diagonal element L(k,k) is a UNIT element
961* and not stored)
962* A(k,k) := D(k,k) = W(k,k)
963* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k)
964*
965* (NOTE: No need to use for Hermitian matrix
966* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal
967* element D(k,k) from W (potentially saves only one load))
968 CALL ccopy( n-k+1, w( k, k ), 1, a( k, k ), 1 )
969 IF( k.LT.n ) THEN
970*
971* (NOTE: No need to check if A(k,k) is NOT ZERO,
972* since that was ensured earlier in pivot search:
973* case A(k,k) = 0 falls into 2x2 pivot case(3))
974*
975* Handle division by a small number
976*
977 t = real( a( k, k ) )
978 IF( abs( t ).GE.sfmin ) THEN
979 r1 = one / t
980 CALL csscal( n-k, r1, a( k+1, k ), 1 )
981 ELSE
982 DO 74 ii = k + 1, n
983 a( ii, k ) = a( ii, k ) / t
984 74 CONTINUE
985 END IF
986*
987* (2) Conjugate column W(k)
988*
989 CALL clacgv( n-k, w( k+1, k ), 1 )
990 END IF
991*
992 ELSE
993*
994* 2-by-2 pivot block D(k): columns k and k+1 of W now hold
995*
996* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
997*
998* where L(k) and L(k+1) are the k-th and (k+1)-th columns
999* of L
1000*
1001* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2
1002* block D(k:k+1,k:k+1) in columns k and k+1 of A.
1003* NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT
1004* block and not stored.
1005* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1)
1006* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) =
1007* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) )
1008*
1009 IF( k.LT.n-1 ) THEN
1010*
1011* Factor out the columns of the inverse of 2-by-2 pivot
1012* block D, so that each column contains 1, to reduce the
1013* number of FLOPS when we multiply panel
1014* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1).
1015*
1016* D**(-1) = ( d11 cj(d21) )**(-1) =
1017* ( d21 d22 )
1018*
1019* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) =
1020* ( (-d21) ( d11 ) )
1021*
1022* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) *
1023*
1024* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) =
1025* ( ( -1 ) ( d11/conj(d21) ) )
1026*
1027* = 1/(|d21|**2) * 1/(D22*D11-1) *
1028*
1029* * ( d21*( D11 ) conj(d21)*( -1 ) ) =
1030* ( ( -1 ) ( D22 ) )
1031*
1032* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) =
1033* ( ( -1 ) ( D22 ) )
1034*
1035* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) =
1036* ( ( -1 ) ( D22 ) )
1037*
1038* Handle division by a small number. (NOTE: order of
1039* operations is important)
1040*
1041* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) )
1042* ( (( -1 ) ) (( D22 ) ) ),
1043*
1044* where D11 = d22/d21,
1045* D22 = d11/conj(d21),
1046* D21 = d21,
1047* T = 1/(D22*D11-1).
1048*
1049* (NOTE: No need to check for division by ZERO,
1050* since that was ensured earlier in pivot search:
1051* (a) d21 != 0 in 2x2 pivot case(4),
1052* since |d21| should be larger than |d11| and |d22|;
1053* (b) (D22*D11 - 1) != 0, since from (a),
1054* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.)
1055*
1056 d21 = w( k+1, k )
1057 d11 = w( k+1, k+1 ) / d21
1058 d22 = w( k, k ) / conjg( d21 )
1059 t = one / ( real( d11*d22 )-one )
1060*
1061* Update elements in columns A(k) and A(k+1) as
1062* dot products of rows of ( W(k) W(k+1) ) and columns
1063* of D**(-1)
1064*
1065 DO 80 j = k + 2, n
1066 a( j, k ) = t*( ( d11*w( j, k )-w( j, k+1 ) ) /
1067 \$ conjg( d21 ) )
1068 a( j, k+1 ) = t*( ( d22*w( j, k+1 )-w( j, k ) ) /
1069 \$ d21 )
1070 80 CONTINUE
1071 END IF
1072*
1073* Copy D(k) to A
1074*
1075 a( k, k ) = w( k, k )
1076 a( k+1, k ) = w( k+1, k )
1077 a( k+1, k+1 ) = w( k+1, k+1 )
1078*
1079* (2) Conjugate columns W(k) and W(k+1)
1080*
1081 CALL clacgv( n-k, w( k+1, k ), 1 )
1082 CALL clacgv( n-k-1, w( k+2, k+1 ), 1 )
1083*
1084 END IF
1085*
1086 END IF
1087*
1088* Store details of the interchanges in IPIV
1089*
1090 IF( kstep.EQ.1 ) THEN
1091 ipiv( k ) = kp
1092 ELSE
1093 ipiv( k ) = -p
1094 ipiv( k+1 ) = -kp
1095 END IF
1096*
1097* Increase K and return to the start of the main loop
1098*
1099 k = k + kstep
1100 GO TO 70
1101*
1102 90 CONTINUE
1103*
1104* Update the lower triangle of A22 (= A(k:n,k:n)) as
1105*
1106* A22 := A22 - L21*D*L21**H = A22 - L21*W**H
1107*
1108* computing blocks of NB columns at a time (note that conjg(W) is
1109* actually stored)
1110*
1111 DO 110 j = k, n, nb
1112 jb = min( nb, n-j+1 )
1113*
1114* Update the lower triangle of the diagonal block
1115*
1116 DO 100 jj = j, j + jb - 1
1117 a( jj, jj ) = real( a( jj, jj ) )
1118 CALL cgemv( 'No transpose', j+jb-jj, k-1, -cone,
1119 \$ a( jj, 1 ), lda, w( jj, 1 ), ldw, cone,
1120 \$ a( jj, jj ), 1 )
1121 a( jj, jj ) = real( a( jj, jj ) )
1122 100 CONTINUE
1123*
1124* Update the rectangular subdiagonal block
1125*
1126 IF( j+jb.LE.n )
1127 \$ CALL cgemm( 'No transpose', 'Transpose', n-j-jb+1, jb,
1128 \$ k-1, -cone, a( j+jb, 1 ), lda, w( j, 1 ),
1129 \$ ldw, cone, a( j+jb, j ), lda )
1130 110 CONTINUE
1131*
1132* Put L21 in standard form by partially undoing the interchanges
1133* of rows in columns 1:k-1 looping backwards from k-1 to 1
1134*
1135 j = k - 1
1136 120 CONTINUE
1137*
1138* Undo the interchanges (if any) of rows J and JP2
1139* (or J and JP2, and J-1 and JP1) at each step J
1140*
1141 kstep = 1
1142 jp1 = 1
1143* (Here, J is a diagonal index)
1144 jj = j
1145 jp2 = ipiv( j )
1146 IF( jp2.LT.0 ) THEN
1147 jp2 = -jp2
1148* (Here, J is a diagonal index)
1149 j = j - 1
1150 jp1 = -ipiv( j )
1151 kstep = 2
1152 END IF
1153* (NOTE: Here, J is used to determine row length. Length J
1154* of the rows to swap back doesn't include diagonal element)
1155 j = j - 1
1156 IF( jp2.NE.jj .AND. j.GE.1 )
1157 \$ CALL cswap( j, a( jp2, 1 ), lda, a( jj, 1 ), lda )
1158 jj = jj -1
1159 IF( kstep.EQ.2 .AND. jp1.NE.jj .AND. j.GE.1 )
1160 \$ CALL cswap( j, a( jp1, 1 ), lda, a( jj, 1 ), lda )
1161 IF( j.GT.1 )
1162 \$ GO TO 120
1163*
1164* Set KB to the number of columns factorized
1165*
1166 kb = k - 1
1167*
1168 END IF
1169 RETURN
1170*
1171* End of CLAHEF_ROOK
1172*
1173 END
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
subroutine cgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
CGEMV
Definition cgemv.f:160
subroutine clacgv(n, x, incx)
CLACGV conjugates a complex vector.
Definition clacgv.f:74
subroutine clahef_rook(uplo, n, nb, kb, a, lda, ipiv, w, ldw, info)