LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
chetri_3x.f
Go to the documentation of this file.
1*> \brief \b CHETRI_3X
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download CHETRI_3X + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetri_3x.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetri_3x.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetri_3x.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE CHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22*
23* .. Scalar Arguments ..
24* CHARACTER UPLO
25* INTEGER INFO, LDA, N, NB
26* ..
27* .. Array Arguments ..
28* INTEGER IPIV( * )
29* COMPLEX A( LDA, * ), E( * ), WORK( N+NB+1, * )
30* ..
31*
32*
33*> \par Purpose:
34* =============
35*>
36*> \verbatim
37*> CHETRI_3X computes the inverse of a complex Hermitian indefinite
38*> matrix A using the factorization computed by CHETRF_RK or CHETRF_BK:
39*>
40*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T),
41*>
42*> where U (or L) is unit upper (or lower) triangular matrix,
43*> U**H (or L**H) is the conjugate of U (or L), P is a permutation
44*> matrix, P**T is the transpose of P, and D is Hermitian and block
45*> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46*>
47*> This is the blocked version of the algorithm, calling Level 3 BLAS.
48*> \endverbatim
49*
50* Arguments:
51* ==========
52*
53*> \param[in] UPLO
54*> \verbatim
55*> UPLO is CHARACTER*1
56*> Specifies whether the details of the factorization are
57*> stored as an upper or lower triangular matrix.
58*> = 'U': Upper triangle of A is stored;
59*> = 'L': Lower triangle of A is stored.
60*> \endverbatim
61*>
62*> \param[in] N
63*> \verbatim
64*> N is INTEGER
65*> The order of the matrix A. N >= 0.
66*> \endverbatim
67*>
68*> \param[in,out] A
69*> \verbatim
70*> A is COMPLEX array, dimension (LDA,N)
71*> On entry, diagonal of the block diagonal matrix D and
72*> factors U or L as computed by CHETRF_RK and CHETRF_BK:
73*> a) ONLY diagonal elements of the Hermitian block diagonal
74*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75*> (superdiagonal (or subdiagonal) elements of D
76*> should be provided on entry in array E), and
77*> b) If UPLO = 'U': factor U in the superdiagonal part of A.
78*> If UPLO = 'L': factor L in the subdiagonal part of A.
79*>
80*> On exit, if INFO = 0, the Hermitian inverse of the original
81*> matrix.
82*> If UPLO = 'U': the upper triangular part of the inverse
83*> is formed and the part of A below the diagonal is not
84*> referenced;
85*> If UPLO = 'L': the lower triangular part of the inverse
86*> is formed and the part of A above the diagonal is not
87*> referenced.
88*> \endverbatim
89*>
90*> \param[in] LDA
91*> \verbatim
92*> LDA is INTEGER
93*> The leading dimension of the array A. LDA >= max(1,N).
94*> \endverbatim
95*>
96*> \param[in] E
97*> \verbatim
98*> E is COMPLEX array, dimension (N)
99*> On entry, contains the superdiagonal (or subdiagonal)
100*> elements of the Hermitian block diagonal matrix D
101*> with 1-by-1 or 2-by-2 diagonal blocks, where
102*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
103*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104*>
105*> NOTE: For 1-by-1 diagonal block D(k), where
106*> 1 <= k <= N, the element E(k) is not referenced in both
107*> UPLO = 'U' or UPLO = 'L' cases.
108*> \endverbatim
109*>
110*> \param[in] IPIV
111*> \verbatim
112*> IPIV is INTEGER array, dimension (N)
113*> Details of the interchanges and the block structure of D
114*> as determined by CHETRF_RK or CHETRF_BK.
115*> \endverbatim
116*>
117*> \param[out] WORK
118*> \verbatim
119*> WORK is COMPLEX array, dimension (N+NB+1,NB+3).
120*> \endverbatim
121*>
122*> \param[in] NB
123*> \verbatim
124*> NB is INTEGER
125*> Block size.
126*> \endverbatim
127*>
128*> \param[out] INFO
129*> \verbatim
130*> INFO is INTEGER
131*> = 0: successful exit
132*> < 0: if INFO = -i, the i-th argument had an illegal value
133*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134*> inverse could not be computed.
135*> \endverbatim
136*
137* Authors:
138* ========
139*
140*> \author Univ. of Tennessee
141*> \author Univ. of California Berkeley
142*> \author Univ. of Colorado Denver
143*> \author NAG Ltd.
144*
145*> \ingroup complexHEcomputational
146*
147*> \par Contributors:
148* ==================
149*> \verbatim
150*>
151*> June 2017, Igor Kozachenko,
152*> Computer Science Division,
153*> University of California, Berkeley
154*>
155*> \endverbatim
156*
157* =====================================================================
158 SUBROUTINE chetri_3x( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
159*
160* -- LAPACK computational routine --
161* -- LAPACK is a software package provided by Univ. of Tennessee, --
162* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163*
164* .. Scalar Arguments ..
165 CHARACTER UPLO
166 INTEGER INFO, LDA, N, NB
167* ..
168* .. Array Arguments ..
169 INTEGER IPIV( * )
170 COMPLEX A( LDA, * ), E( * ), WORK( N+NB+1, * )
171* ..
172*
173* =====================================================================
174*
175* .. Parameters ..
176 REAL ONE
177 parameter( one = 1.0e+0 )
178 COMPLEX CONE, CZERO
179 parameter( cone = ( 1.0e+0, 0.0e+0 ),
180 $ czero = ( 0.0e+0, 0.0e+0 ) )
181* ..
182* .. Local Scalars ..
183 LOGICAL UPPER
184 INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
185 REAL AK, AKP1, T
186 COMPLEX AKKP1, D, U01_I_J, U01_IP1_J, U11_I_J,
187 $ U11_IP1_J
188* ..
189* .. External Functions ..
190 LOGICAL LSAME
191 EXTERNAL lsame
192* ..
193* .. External Subroutines ..
194 EXTERNAL cgemm, cheswapr, ctrtri, ctrmm, xerbla
195* ..
196* .. Intrinsic Functions ..
197 INTRINSIC abs, conjg, max, real
198* ..
199* .. Executable Statements ..
200*
201* Test the input parameters.
202*
203 info = 0
204 upper = lsame( uplo, 'U' )
205 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
206 info = -1
207 ELSE IF( n.LT.0 ) THEN
208 info = -2
209 ELSE IF( lda.LT.max( 1, n ) ) THEN
210 info = -4
211 END IF
212*
213* Quick return if possible
214*
215 IF( info.NE.0 ) THEN
216 CALL xerbla( 'CHETRI_3X', -info )
217 RETURN
218 END IF
219 IF( n.EQ.0 )
220 $ RETURN
221*
222* Workspace got Non-diag elements of D
223*
224 DO k = 1, n
225 work( k, 1 ) = e( k )
226 END DO
227*
228* Check that the diagonal matrix D is nonsingular.
229*
230 IF( upper ) THEN
231*
232* Upper triangular storage: examine D from bottom to top
233*
234 DO info = n, 1, -1
235 IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.czero )
236 $ RETURN
237 END DO
238 ELSE
239*
240* Lower triangular storage: examine D from top to bottom.
241*
242 DO info = 1, n
243 IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.czero )
244 $ RETURN
245 END DO
246 END IF
247*
248 info = 0
249*
250* Splitting Workspace
251* U01 is a block ( N, NB+1 )
252* The first element of U01 is in WORK( 1, 1 )
253* U11 is a block ( NB+1, NB+1 )
254* The first element of U11 is in WORK( N+1, 1 )
255*
256 u11 = n
257*
258* INVD is a block ( N, 2 )
259* The first element of INVD is in WORK( 1, INVD )
260*
261 invd = nb + 2
262
263 IF( upper ) THEN
264*
265* Begin Upper
266*
267* invA = P * inv(U**H) * inv(D) * inv(U) * P**T.
268*
269 CALL ctrtri( uplo, 'U', n, a, lda, info )
270*
271* inv(D) and inv(D) * inv(U)
272*
273 k = 1
274 DO WHILE( k.LE.n )
275 IF( ipiv( k ).GT.0 ) THEN
276* 1 x 1 diagonal NNB
277 work( k, invd ) = one / real( a( k, k ) )
278 work( k, invd+1 ) = czero
279 ELSE
280* 2 x 2 diagonal NNB
281 t = abs( work( k+1, 1 ) )
282 ak = real( a( k, k ) ) / t
283 akp1 = real( a( k+1, k+1 ) ) / t
284 akkp1 = work( k+1, 1 ) / t
285 d = t*( ak*akp1-cone )
286 work( k, invd ) = akp1 / d
287 work( k+1, invd+1 ) = ak / d
288 work( k, invd+1 ) = -akkp1 / d
289 work( k+1, invd ) = conjg( work( k, invd+1 ) )
290 k = k + 1
291 END IF
292 k = k + 1
293 END DO
294*
295* inv(U**H) = (inv(U))**H
296*
297* inv(U**H) * inv(D) * inv(U)
298*
299 cut = n
300 DO WHILE( cut.GT.0 )
301 nnb = nb
302 IF( cut.LE.nnb ) THEN
303 nnb = cut
304 ELSE
305 icount = 0
306* count negative elements,
307 DO i = cut+1-nnb, cut
308 IF( ipiv( i ).LT.0 ) icount = icount + 1
309 END DO
310* need a even number for a clear cut
311 IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
312 END IF
313
314 cut = cut - nnb
315*
316* U01 Block
317*
318 DO i = 1, cut
319 DO j = 1, nnb
320 work( i, j ) = a( i, cut+j )
321 END DO
322 END DO
323*
324* U11 Block
325*
326 DO i = 1, nnb
327 work( u11+i, i ) = cone
328 DO j = 1, i-1
329 work( u11+i, j ) = czero
330 END DO
331 DO j = i+1, nnb
332 work( u11+i, j ) = a( cut+i, cut+j )
333 END DO
334 END DO
335*
336* invD * U01
337*
338 i = 1
339 DO WHILE( i.LE.cut )
340 IF( ipiv( i ).GT.0 ) THEN
341 DO j = 1, nnb
342 work( i, j ) = work( i, invd ) * work( i, j )
343 END DO
344 ELSE
345 DO j = 1, nnb
346 u01_i_j = work( i, j )
347 u01_ip1_j = work( i+1, j )
348 work( i, j ) = work( i, invd ) * u01_i_j
349 $ + work( i, invd+1 ) * u01_ip1_j
350 work( i+1, j ) = work( i+1, invd ) * u01_i_j
351 $ + work( i+1, invd+1 ) * u01_ip1_j
352 END DO
353 i = i + 1
354 END IF
355 i = i + 1
356 END DO
357*
358* invD1 * U11
359*
360 i = 1
361 DO WHILE ( i.LE.nnb )
362 IF( ipiv( cut+i ).GT.0 ) THEN
363 DO j = i, nnb
364 work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
365 END DO
366 ELSE
367 DO j = i, nnb
368 u11_i_j = work(u11+i,j)
369 u11_ip1_j = work(u11+i+1,j)
370 work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
371 $ + work(cut+i,invd+1) * work(u11+i+1,j)
372 work( u11+i+1, j ) = work(cut+i+1,invd) * u11_i_j
373 $ + work(cut+i+1,invd+1) * u11_ip1_j
374 END DO
375 i = i + 1
376 END IF
377 i = i + 1
378 END DO
379*
380* U11**H * invD1 * U11 -> U11
381*
382 CALL ctrmm( 'L', 'U', 'C', 'U', nnb, nnb,
383 $ cone, a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
384 $ n+nb+1 )
385*
386 DO i = 1, nnb
387 DO j = i, nnb
388 a( cut+i, cut+j ) = work( u11+i, j )
389 END DO
390 END DO
391*
392* U01**H * invD * U01 -> A( CUT+I, CUT+J )
393*
394 CALL cgemm( 'C', 'N', nnb, nnb, cut, cone, a( 1, cut+1 ),
395 $ lda, work, n+nb+1, czero, work(u11+1,1),
396 $ n+nb+1 )
397
398*
399* U11 = U11**H * invD1 * U11 + U01**H * invD * U01
400*
401 DO i = 1, nnb
402 DO j = i, nnb
403 a( cut+i, cut+j ) = a( cut+i, cut+j ) + work(u11+i,j)
404 END DO
405 END DO
406*
407* U01 = U00**H * invD0 * U01
408*
409 CALL ctrmm( 'L', uplo, 'C', 'U', cut, nnb,
410 $ cone, a, lda, work, n+nb+1 )
411
412*
413* Update U01
414*
415 DO i = 1, cut
416 DO j = 1, nnb
417 a( i, cut+j ) = work( i, j )
418 END DO
419 END DO
420*
421* Next Block
422*
423 END DO
424*
425* Apply PERMUTATIONS P and P**T:
426* P * inv(U**H) * inv(D) * inv(U) * P**T.
427* Interchange rows and columns I and IPIV(I) in reverse order
428* from the formation order of IPIV vector for Upper case.
429*
430* ( We can use a loop over IPIV with increment 1,
431* since the ABS value of IPIV(I) represents the row (column)
432* index of the interchange with row (column) i in both 1x1
433* and 2x2 pivot cases, i.e. we don't need separate code branches
434* for 1x1 and 2x2 pivot cases )
435*
436 DO i = 1, n
437 ip = abs( ipiv( i ) )
438 IF( ip.NE.i ) THEN
439 IF (i .LT. ip) CALL cheswapr( uplo, n, a, lda, i ,ip )
440 IF (i .GT. ip) CALL cheswapr( uplo, n, a, lda, ip ,i )
441 END IF
442 END DO
443*
444 ELSE
445*
446* Begin Lower
447*
448* inv A = P * inv(L**H) * inv(D) * inv(L) * P**T.
449*
450 CALL ctrtri( uplo, 'U', n, a, lda, info )
451*
452* inv(D) and inv(D) * inv(L)
453*
454 k = n
455 DO WHILE ( k .GE. 1 )
456 IF( ipiv( k ).GT.0 ) THEN
457* 1 x 1 diagonal NNB
458 work( k, invd ) = one / real( a( k, k ) )
459 work( k, invd+1 ) = czero
460 ELSE
461* 2 x 2 diagonal NNB
462 t = abs( work( k-1, 1 ) )
463 ak = real( a( k-1, k-1 ) ) / t
464 akp1 = real( a( k, k ) ) / t
465 akkp1 = work( k-1, 1 ) / t
466 d = t*( ak*akp1-cone )
467 work( k-1, invd ) = akp1 / d
468 work( k, invd ) = ak / d
469 work( k, invd+1 ) = -akkp1 / d
470 work( k-1, invd+1 ) = conjg( work( k, invd+1 ) )
471 k = k - 1
472 END IF
473 k = k - 1
474 END DO
475*
476* inv(L**H) = (inv(L))**H
477*
478* inv(L**H) * inv(D) * inv(L)
479*
480 cut = 0
481 DO WHILE( cut.LT.n )
482 nnb = nb
483 IF( (cut + nnb).GT.n ) THEN
484 nnb = n - cut
485 ELSE
486 icount = 0
487* count negative elements,
488 DO i = cut + 1, cut+nnb
489 IF ( ipiv( i ).LT.0 ) icount = icount + 1
490 END DO
491* need a even number for a clear cut
492 IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
493 END IF
494*
495* L21 Block
496*
497 DO i = 1, n-cut-nnb
498 DO j = 1, nnb
499 work( i, j ) = a( cut+nnb+i, cut+j )
500 END DO
501 END DO
502*
503* L11 Block
504*
505 DO i = 1, nnb
506 work( u11+i, i) = cone
507 DO j = i+1, nnb
508 work( u11+i, j ) = czero
509 END DO
510 DO j = 1, i-1
511 work( u11+i, j ) = a( cut+i, cut+j )
512 END DO
513 END DO
514*
515* invD*L21
516*
517 i = n-cut-nnb
518 DO WHILE( i.GE.1 )
519 IF( ipiv( cut+nnb+i ).GT.0 ) THEN
520 DO j = 1, nnb
521 work( i, j ) = work( cut+nnb+i, invd) * work( i, j)
522 END DO
523 ELSE
524 DO j = 1, nnb
525 u01_i_j = work(i,j)
526 u01_ip1_j = work(i-1,j)
527 work(i,j)=work(cut+nnb+i,invd)*u01_i_j+
528 $ work(cut+nnb+i,invd+1)*u01_ip1_j
529 work(i-1,j)=work(cut+nnb+i-1,invd+1)*u01_i_j+
530 $ work(cut+nnb+i-1,invd)*u01_ip1_j
531 END DO
532 i = i - 1
533 END IF
534 i = i - 1
535 END DO
536*
537* invD1*L11
538*
539 i = nnb
540 DO WHILE( i.GE.1 )
541 IF( ipiv( cut+i ).GT.0 ) THEN
542 DO j = 1, nnb
543 work( u11+i, j ) = work( cut+i, invd)*work(u11+i,j)
544 END DO
545
546 ELSE
547 DO j = 1, nnb
548 u11_i_j = work( u11+i, j )
549 u11_ip1_j = work( u11+i-1, j )
550 work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
551 $ + work(cut+i,invd+1) * u11_ip1_j
552 work( u11+i-1, j ) = work(cut+i-1,invd+1) * u11_i_j
553 $ + work(cut+i-1,invd) * u11_ip1_j
554 END DO
555 i = i - 1
556 END IF
557 i = i - 1
558 END DO
559*
560* L11**H * invD1 * L11 -> L11
561*
562 CALL ctrmm( 'L', uplo, 'C', 'U', nnb, nnb, cone,
563 $ a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
564 $ n+nb+1 )
565
566*
567 DO i = 1, nnb
568 DO j = 1, i
569 a( cut+i, cut+j ) = work( u11+i, j )
570 END DO
571 END DO
572*
573 IF( (cut+nnb).LT.n ) THEN
574*
575* L21**H * invD2*L21 -> A( CUT+I, CUT+J )
576*
577 CALL cgemm( 'C', 'N', nnb, nnb, n-nnb-cut, cone,
578 $ a( cut+nnb+1, cut+1 ), lda, work, n+nb+1,
579 $ czero, work( u11+1, 1 ), n+nb+1 )
580
581*
582* L11 = L11**H * invD1 * L11 + U01**H * invD * U01
583*
584 DO i = 1, nnb
585 DO j = 1, i
586 a( cut+i, cut+j ) = a( cut+i, cut+j )+work(u11+i,j)
587 END DO
588 END DO
589*
590* L01 = L22**H * invD2 * L21
591*
592 CALL ctrmm( 'L', uplo, 'C', 'U', n-nnb-cut, nnb, cone,
593 $ a( cut+nnb+1, cut+nnb+1 ), lda, work,
594 $ n+nb+1 )
595*
596* Update L21
597*
598 DO i = 1, n-cut-nnb
599 DO j = 1, nnb
600 a( cut+nnb+i, cut+j ) = work( i, j )
601 END DO
602 END DO
603*
604 ELSE
605*
606* L11 = L11**H * invD1 * L11
607*
608 DO i = 1, nnb
609 DO j = 1, i
610 a( cut+i, cut+j ) = work( u11+i, j )
611 END DO
612 END DO
613 END IF
614*
615* Next Block
616*
617 cut = cut + nnb
618*
619 END DO
620*
621* Apply PERMUTATIONS P and P**T:
622* P * inv(L**H) * inv(D) * inv(L) * P**T.
623* Interchange rows and columns I and IPIV(I) in reverse order
624* from the formation order of IPIV vector for Lower case.
625*
626* ( We can use a loop over IPIV with increment -1,
627* since the ABS value of IPIV(I) represents the row (column)
628* index of the interchange with row (column) i in both 1x1
629* and 2x2 pivot cases, i.e. we don't need separate code branches
630* for 1x1 and 2x2 pivot cases )
631*
632 DO i = n, 1, -1
633 ip = abs( ipiv( i ) )
634 IF( ip.NE.i ) THEN
635 IF (i .LT. ip) CALL cheswapr( uplo, n, a, lda, i ,ip )
636 IF (i .GT. ip) CALL cheswapr( uplo, n, a, lda, ip ,i )
637 END IF
638 END DO
639*
640 END IF
641*
642 RETURN
643*
644* End of CHETRI_3X
645*
646 END
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:187
subroutine ctrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
CTRMM
Definition: ctrmm.f:177
subroutine cheswapr(UPLO, N, A, LDA, I1, I2)
CHESWAPR applies an elementary permutation on the rows and columns of a Hermitian matrix.
Definition: cheswapr.f:102
subroutine chetri_3x(UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO)
CHETRI_3X
Definition: chetri_3x.f:159
subroutine ctrtri(UPLO, DIAG, N, A, LDA, INFO)
CTRTRI
Definition: ctrtri.f:109