LAPACK  3.10.0
LAPACK: Linear Algebra PACKage
ssytri_3x.f
Go to the documentation of this file.
1 *> \brief \b SSYTRI_3X
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SSYTRI_3X + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssytri_3x.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssytri_3x.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssytri_3x.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22 *
23 * .. Scalar Arguments ..
24 * CHARACTER UPLO
25 * INTEGER INFO, LDA, N, NB
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * REAL A( LDA, * ), E( * ), WORK( N+NB+1, * )
30 * ..
31 *
32 *
33 *> \par Purpose:
34 * =============
35 *>
36 *> \verbatim
37 *> SSYTRI_3X computes the inverse of a real symmetric indefinite
38 *> matrix A using the factorization computed by SSYTRF_RK or SSYTRF_BK:
39 *>
40 *> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
41 *>
42 *> where U (or L) is unit upper (or lower) triangular matrix,
43 *> U**T (or L**T) is the transpose of U (or L), P is a permutation
44 *> matrix, P**T is the transpose of P, and D is symmetric and block
45 *> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46 *>
47 *> This is the blocked version of the algorithm, calling Level 3 BLAS.
48 *> \endverbatim
49 *
50 * Arguments:
51 * ==========
52 *
53 *> \param[in] UPLO
54 *> \verbatim
55 *> UPLO is CHARACTER*1
56 *> Specifies whether the details of the factorization are
57 *> stored as an upper or lower triangular matrix.
58 *> = 'U': Upper triangle of A is stored;
59 *> = 'L': Lower triangle of A is stored.
60 *> \endverbatim
61 *>
62 *> \param[in] N
63 *> \verbatim
64 *> N is INTEGER
65 *> The order of the matrix A. N >= 0.
66 *> \endverbatim
67 *>
68 *> \param[in,out] A
69 *> \verbatim
70 *> A is REAL array, dimension (LDA,N)
71 *> On entry, diagonal of the block diagonal matrix D and
72 *> factors U or L as computed by SYTRF_RK and SSYTRF_BK:
73 *> a) ONLY diagonal elements of the symmetric block diagonal
74 *> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75 *> (superdiagonal (or subdiagonal) elements of D
76 *> should be provided on entry in array E), and
77 *> b) If UPLO = 'U': factor U in the superdiagonal part of A.
78 *> If UPLO = 'L': factor L in the subdiagonal part of A.
79 *>
80 *> On exit, if INFO = 0, the symmetric inverse of the original
81 *> matrix.
82 *> If UPLO = 'U': the upper triangular part of the inverse
83 *> is formed and the part of A below the diagonal is not
84 *> referenced;
85 *> If UPLO = 'L': the lower triangular part of the inverse
86 *> is formed and the part of A above the diagonal is not
87 *> referenced.
88 *> \endverbatim
89 *>
90 *> \param[in] LDA
91 *> \verbatim
92 *> LDA is INTEGER
93 *> The leading dimension of the array A. LDA >= max(1,N).
94 *> \endverbatim
95 *>
96 *> \param[in] E
97 *> \verbatim
98 *> E is REAL array, dimension (N)
99 *> On entry, contains the superdiagonal (or subdiagonal)
100 *> elements of the symmetric block diagonal matrix D
101 *> with 1-by-1 or 2-by-2 diagonal blocks, where
102 *> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
103 *> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104 *>
105 *> NOTE: For 1-by-1 diagonal block D(k), where
106 *> 1 <= k <= N, the element E(k) is not referenced in both
107 *> UPLO = 'U' or UPLO = 'L' cases.
108 *> \endverbatim
109 *>
110 *> \param[in] IPIV
111 *> \verbatim
112 *> IPIV is INTEGER array, dimension (N)
113 *> Details of the interchanges and the block structure of D
114 *> as determined by SSYTRF_RK or SSYTRF_BK.
115 *> \endverbatim
116 *>
117 *> \param[out] WORK
118 *> \verbatim
119 *> WORK is REAL array, dimension (N+NB+1,NB+3).
120 *> \endverbatim
121 *>
122 *> \param[in] NB
123 *> \verbatim
124 *> NB is INTEGER
125 *> Block size.
126 *> \endverbatim
127 *>
128 *> \param[out] INFO
129 *> \verbatim
130 *> INFO is INTEGER
131 *> = 0: successful exit
132 *> < 0: if INFO = -i, the i-th argument had an illegal value
133 *> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134 *> inverse could not be computed.
135 *> \endverbatim
136 *
137 * Authors:
138 * ========
139 *
140 *> \author Univ. of Tennessee
141 *> \author Univ. of California Berkeley
142 *> \author Univ. of Colorado Denver
143 *> \author NAG Ltd.
144 *
145 *> \ingroup singleSYcomputational
146 *
147 *> \par Contributors:
148 * ==================
149 *> \verbatim
150 *>
151 *> June 2017, Igor Kozachenko,
152 *> Computer Science Division,
153 *> University of California, Berkeley
154 *>
155 *> \endverbatim
156 *
157 * =====================================================================
158  SUBROUTINE ssytri_3x( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
159 *
160 * -- LAPACK computational routine --
161 * -- LAPACK is a software package provided by Univ. of Tennessee, --
162 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163 *
164 * .. Scalar Arguments ..
165  CHARACTER UPLO
166  INTEGER INFO, LDA, N, NB
167 * ..
168 * .. Array Arguments ..
169  INTEGER IPIV( * )
170  REAL A( LDA, * ), E( * ), WORK( N+NB+1, * )
171 * ..
172 *
173 * =====================================================================
174 *
175 * .. Parameters ..
176  REAL ONE, ZERO
177  parameter( one = 1.0e+0, zero = 0.0e+0 )
178 * ..
179 * .. Local Scalars ..
180  LOGICAL UPPER
181  INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
182  REAL AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J,
183  $ U11_I_J, U11_IP1_J
184 * ..
185 * .. External Functions ..
186  LOGICAL LSAME
187  EXTERNAL lsame
188 * ..
189 * .. External Subroutines ..
190  EXTERNAL sgemm, ssyswapr, strtri, strmm, xerbla
191 * ..
192 * .. Intrinsic Functions ..
193  INTRINSIC abs, max, mod
194 * ..
195 * .. Executable Statements ..
196 *
197 * Test the input parameters.
198 *
199  info = 0
200  upper = lsame( uplo, 'U' )
201  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
202  info = -1
203  ELSE IF( n.LT.0 ) THEN
204  info = -2
205  ELSE IF( lda.LT.max( 1, n ) ) THEN
206  info = -4
207  END IF
208 *
209 * Quick return if possible
210 *
211  IF( info.NE.0 ) THEN
212  CALL xerbla( 'SSYTRI_3X', -info )
213  RETURN
214  END IF
215  IF( n.EQ.0 )
216  $ RETURN
217 *
218 * Workspace got Non-diag elements of D
219 *
220  DO k = 1, n
221  work( k, 1 ) = e( k )
222  END DO
223 *
224 * Check that the diagonal matrix D is nonsingular.
225 *
226  IF( upper ) THEN
227 *
228 * Upper triangular storage: examine D from bottom to top
229 *
230  DO info = n, 1, -1
231  IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.zero )
232  $ RETURN
233  END DO
234  ELSE
235 *
236 * Lower triangular storage: examine D from top to bottom.
237 *
238  DO info = 1, n
239  IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.zero )
240  $ RETURN
241  END DO
242  END IF
243 *
244  info = 0
245 *
246 * Splitting Workspace
247 * U01 is a block ( N, NB+1 )
248 * The first element of U01 is in WORK( 1, 1 )
249 * U11 is a block ( NB+1, NB+1 )
250 * The first element of U11 is in WORK( N+1, 1 )
251 *
252  u11 = n
253 *
254 * INVD is a block ( N, 2 )
255 * The first element of INVD is in WORK( 1, INVD )
256 *
257  invd = nb + 2
258 
259  IF( upper ) THEN
260 *
261 * Begin Upper
262 *
263 * invA = P * inv(U**T) * inv(D) * inv(U) * P**T.
264 *
265  CALL strtri( uplo, 'U', n, a, lda, info )
266 *
267 * inv(D) and inv(D) * inv(U)
268 *
269  k = 1
270  DO WHILE( k.LE.n )
271  IF( ipiv( k ).GT.0 ) THEN
272 * 1 x 1 diagonal NNB
273  work( k, invd ) = one / a( k, k )
274  work( k, invd+1 ) = zero
275  ELSE
276 * 2 x 2 diagonal NNB
277  t = work( k+1, 1 )
278  ak = a( k, k ) / t
279  akp1 = a( k+1, k+1 ) / t
280  akkp1 = work( k+1, 1 ) / t
281  d = t*( ak*akp1-one )
282  work( k, invd ) = akp1 / d
283  work( k+1, invd+1 ) = ak / d
284  work( k, invd+1 ) = -akkp1 / d
285  work( k+1, invd ) = work( k, invd+1 )
286  k = k + 1
287  END IF
288  k = k + 1
289  END DO
290 *
291 * inv(U**T) = (inv(U))**T
292 *
293 * inv(U**T) * inv(D) * inv(U)
294 *
295  cut = n
296  DO WHILE( cut.GT.0 )
297  nnb = nb
298  IF( cut.LE.nnb ) THEN
299  nnb = cut
300  ELSE
301  icount = 0
302 * count negative elements,
303  DO i = cut+1-nnb, cut
304  IF( ipiv( i ).LT.0 ) icount = icount + 1
305  END DO
306 * need a even number for a clear cut
307  IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
308  END IF
309 
310  cut = cut - nnb
311 *
312 * U01 Block
313 *
314  DO i = 1, cut
315  DO j = 1, nnb
316  work( i, j ) = a( i, cut+j )
317  END DO
318  END DO
319 *
320 * U11 Block
321 *
322  DO i = 1, nnb
323  work( u11+i, i ) = one
324  DO j = 1, i-1
325  work( u11+i, j ) = zero
326  END DO
327  DO j = i+1, nnb
328  work( u11+i, j ) = a( cut+i, cut+j )
329  END DO
330  END DO
331 *
332 * invD * U01
333 *
334  i = 1
335  DO WHILE( i.LE.cut )
336  IF( ipiv( i ).GT.0 ) THEN
337  DO j = 1, nnb
338  work( i, j ) = work( i, invd ) * work( i, j )
339  END DO
340  ELSE
341  DO j = 1, nnb
342  u01_i_j = work( i, j )
343  u01_ip1_j = work( i+1, j )
344  work( i, j ) = work( i, invd ) * u01_i_j
345  $ + work( i, invd+1 ) * u01_ip1_j
346  work( i+1, j ) = work( i+1, invd ) * u01_i_j
347  $ + work( i+1, invd+1 ) * u01_ip1_j
348  END DO
349  i = i + 1
350  END IF
351  i = i + 1
352  END DO
353 *
354 * invD1 * U11
355 *
356  i = 1
357  DO WHILE ( i.LE.nnb )
358  IF( ipiv( cut+i ).GT.0 ) THEN
359  DO j = i, nnb
360  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
361  END DO
362  ELSE
363  DO j = i, nnb
364  u11_i_j = work(u11+i,j)
365  u11_ip1_j = work(u11+i+1,j)
366  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
367  $ + work(cut+i,invd+1) * work(u11+i+1,j)
368  work( u11+i+1, j ) = work(cut+i+1,invd) * u11_i_j
369  $ + work(cut+i+1,invd+1) * u11_ip1_j
370  END DO
371  i = i + 1
372  END IF
373  i = i + 1
374  END DO
375 *
376 * U11**T * invD1 * U11 -> U11
377 *
378  CALL strmm( 'L', 'U', 'T', 'U', nnb, nnb,
379  $ one, a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
380  $ n+nb+1 )
381 *
382  DO i = 1, nnb
383  DO j = i, nnb
384  a( cut+i, cut+j ) = work( u11+i, j )
385  END DO
386  END DO
387 *
388 * U01**T * invD * U01 -> A( CUT+I, CUT+J )
389 *
390  CALL sgemm( 'T', 'N', nnb, nnb, cut, one, a( 1, cut+1 ),
391  $ lda, work, n+nb+1, zero, work(u11+1,1), n+nb+1 )
392 
393 *
394 * U11 = U11**T * invD1 * U11 + U01**T * invD * U01
395 *
396  DO i = 1, nnb
397  DO j = i, nnb
398  a( cut+i, cut+j ) = a( cut+i, cut+j ) + work(u11+i,j)
399  END DO
400  END DO
401 *
402 * U01 = U00**T * invD0 * U01
403 *
404  CALL strmm( 'L', uplo, 'T', 'U', cut, nnb,
405  $ one, a, lda, work, n+nb+1 )
406 
407 *
408 * Update U01
409 *
410  DO i = 1, cut
411  DO j = 1, nnb
412  a( i, cut+j ) = work( i, j )
413  END DO
414  END DO
415 *
416 * Next Block
417 *
418  END DO
419 *
420 * Apply PERMUTATIONS P and P**T:
421 * P * inv(U**T) * inv(D) * inv(U) * P**T.
422 * Interchange rows and columns I and IPIV(I) in reverse order
423 * from the formation order of IPIV vector for Upper case.
424 *
425 * ( We can use a loop over IPIV with increment 1,
426 * since the ABS value of IPIV(I) represents the row (column)
427 * index of the interchange with row (column) i in both 1x1
428 * and 2x2 pivot cases, i.e. we don't need separate code branches
429 * for 1x1 and 2x2 pivot cases )
430 *
431  DO i = 1, n
432  ip = abs( ipiv( i ) )
433  IF( ip.NE.i ) THEN
434  IF (i .LT. ip) CALL ssyswapr( uplo, n, a, lda, i ,ip )
435  IF (i .GT. ip) CALL ssyswapr( uplo, n, a, lda, ip ,i )
436  END IF
437  END DO
438 *
439  ELSE
440 *
441 * Begin Lower
442 *
443 * inv A = P * inv(L**T) * inv(D) * inv(L) * P**T.
444 *
445  CALL strtri( uplo, 'U', n, a, lda, info )
446 *
447 * inv(D) and inv(D) * inv(L)
448 *
449  k = n
450  DO WHILE ( k .GE. 1 )
451  IF( ipiv( k ).GT.0 ) THEN
452 * 1 x 1 diagonal NNB
453  work( k, invd ) = one / a( k, k )
454  work( k, invd+1 ) = zero
455  ELSE
456 * 2 x 2 diagonal NNB
457  t = work( k-1, 1 )
458  ak = a( k-1, k-1 ) / t
459  akp1 = a( k, k ) / t
460  akkp1 = work( k-1, 1 ) / t
461  d = t*( ak*akp1-one )
462  work( k-1, invd ) = akp1 / d
463  work( k, invd ) = ak / d
464  work( k, invd+1 ) = -akkp1 / d
465  work( k-1, invd+1 ) = work( k, invd+1 )
466  k = k - 1
467  END IF
468  k = k - 1
469  END DO
470 *
471 * inv(L**T) = (inv(L))**T
472 *
473 * inv(L**T) * inv(D) * inv(L)
474 *
475  cut = 0
476  DO WHILE( cut.LT.n )
477  nnb = nb
478  IF( (cut + nnb).GT.n ) THEN
479  nnb = n - cut
480  ELSE
481  icount = 0
482 * count negative elements,
483  DO i = cut + 1, cut+nnb
484  IF ( ipiv( i ).LT.0 ) icount = icount + 1
485  END DO
486 * need a even number for a clear cut
487  IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
488  END IF
489 *
490 * L21 Block
491 *
492  DO i = 1, n-cut-nnb
493  DO j = 1, nnb
494  work( i, j ) = a( cut+nnb+i, cut+j )
495  END DO
496  END DO
497 *
498 * L11 Block
499 *
500  DO i = 1, nnb
501  work( u11+i, i) = one
502  DO j = i+1, nnb
503  work( u11+i, j ) = zero
504  END DO
505  DO j = 1, i-1
506  work( u11+i, j ) = a( cut+i, cut+j )
507  END DO
508  END DO
509 *
510 * invD*L21
511 *
512  i = n-cut-nnb
513  DO WHILE( i.GE.1 )
514  IF( ipiv( cut+nnb+i ).GT.0 ) THEN
515  DO j = 1, nnb
516  work( i, j ) = work( cut+nnb+i, invd) * work( i, j)
517  END DO
518  ELSE
519  DO j = 1, nnb
520  u01_i_j = work(i,j)
521  u01_ip1_j = work(i-1,j)
522  work(i,j)=work(cut+nnb+i,invd)*u01_i_j+
523  $ work(cut+nnb+i,invd+1)*u01_ip1_j
524  work(i-1,j)=work(cut+nnb+i-1,invd+1)*u01_i_j+
525  $ work(cut+nnb+i-1,invd)*u01_ip1_j
526  END DO
527  i = i - 1
528  END IF
529  i = i - 1
530  END DO
531 *
532 * invD1*L11
533 *
534  i = nnb
535  DO WHILE( i.GE.1 )
536  IF( ipiv( cut+i ).GT.0 ) THEN
537  DO j = 1, nnb
538  work( u11+i, j ) = work( cut+i, invd)*work(u11+i,j)
539  END DO
540 
541  ELSE
542  DO j = 1, nnb
543  u11_i_j = work( u11+i, j )
544  u11_ip1_j = work( u11+i-1, j )
545  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
546  $ + work(cut+i,invd+1) * u11_ip1_j
547  work( u11+i-1, j ) = work(cut+i-1,invd+1) * u11_i_j
548  $ + work(cut+i-1,invd) * u11_ip1_j
549  END DO
550  i = i - 1
551  END IF
552  i = i - 1
553  END DO
554 *
555 * L11**T * invD1 * L11 -> L11
556 *
557  CALL strmm( 'L', uplo, 'T', 'U', nnb, nnb, one,
558  $ a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
559  $ n+nb+1 )
560 
561 *
562  DO i = 1, nnb
563  DO j = 1, i
564  a( cut+i, cut+j ) = work( u11+i, j )
565  END DO
566  END DO
567 *
568  IF( (cut+nnb).LT.n ) THEN
569 *
570 * L21**T * invD2*L21 -> A( CUT+I, CUT+J )
571 *
572  CALL sgemm( 'T', 'N', nnb, nnb, n-nnb-cut, one,
573  $ a( cut+nnb+1, cut+1 ), lda, work, n+nb+1,
574  $ zero, work( u11+1, 1 ), n+nb+1 )
575 
576 *
577 * L11 = L11**T * invD1 * L11 + U01**T * invD * U01
578 *
579  DO i = 1, nnb
580  DO j = 1, i
581  a( cut+i, cut+j ) = a( cut+i, cut+j )+work(u11+i,j)
582  END DO
583  END DO
584 *
585 * L01 = L22**T * invD2 * L21
586 *
587  CALL strmm( 'L', uplo, 'T', 'U', n-nnb-cut, nnb, one,
588  $ a( cut+nnb+1, cut+nnb+1 ), lda, work,
589  $ n+nb+1 )
590 *
591 * Update L21
592 *
593  DO i = 1, n-cut-nnb
594  DO j = 1, nnb
595  a( cut+nnb+i, cut+j ) = work( i, j )
596  END DO
597  END DO
598 *
599  ELSE
600 *
601 * L11 = L11**T * invD1 * L11
602 *
603  DO i = 1, nnb
604  DO j = 1, i
605  a( cut+i, cut+j ) = work( u11+i, j )
606  END DO
607  END DO
608  END IF
609 *
610 * Next Block
611 *
612  cut = cut + nnb
613 *
614  END DO
615 *
616 * Apply PERMUTATIONS P and P**T:
617 * P * inv(L**T) * inv(D) * inv(L) * P**T.
618 * Interchange rows and columns I and IPIV(I) in reverse order
619 * from the formation order of IPIV vector for Lower case.
620 *
621 * ( We can use a loop over IPIV with increment -1,
622 * since the ABS value of IPIV(I) represents the row (column)
623 * index of the interchange with row (column) i in both 1x1
624 * and 2x2 pivot cases, i.e. we don't need separate code branches
625 * for 1x1 and 2x2 pivot cases )
626 *
627  DO i = n, 1, -1
628  ip = abs( ipiv( i ) )
629  IF( ip.NE.i ) THEN
630  IF (i .LT. ip) CALL ssyswapr( uplo, n, a, lda, i ,ip )
631  IF (i .GT. ip) CALL ssyswapr( uplo, n, a, lda, ip ,i )
632  END IF
633  END DO
634 *
635  END IF
636 *
637  RETURN
638 *
639 * End of SSYTRI_3X
640 *
641  END
642 
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine strtri(UPLO, DIAG, N, A, LDA, INFO)
STRTRI
Definition: strtri.f:109
subroutine ssyswapr(UPLO, N, A, LDA, I1, I2)
SSYSWAPR applies an elementary permutation on the rows and columns of a symmetric matrix.
Definition: ssyswapr.f:102
subroutine strmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRMM
Definition: strmm.f:177
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:187
subroutine ssytri_3x(UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO)
SSYTRI_3X
Definition: ssytri_3x.f:159