LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ ssytri_3x()

subroutine ssytri_3x ( character  UPLO,
integer  N,
real, dimension( lda, * )  A,
integer  LDA,
real, dimension( * )  E,
integer, dimension( * )  IPIV,
real, dimension( n+nb+1, * )  WORK,
integer  NB,
integer  INFO 
)

SSYTRI_3X

Download SSYTRI_3X + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SSYTRI_3X computes the inverse of a real symmetric indefinite
 matrix A using the factorization computed by SSYTRF_RK or SSYTRF_BK:

     A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),

 where U (or L) is unit upper (or lower) triangular matrix,
 U**T (or L**T) is the transpose of U (or L), P is a permutation
 matrix, P**T is the transpose of P, and D is symmetric and block
 diagonal with 1-by-1 and 2-by-2 diagonal blocks.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          Specifies whether the details of the factorization are
          stored as an upper or lower triangular matrix.
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is REAL array, dimension (LDA,N)
          On entry, diagonal of the block diagonal matrix D and
          factors U or L as computed by SYTRF_RK and SSYTRF_BK:
            a) ONLY diagonal elements of the symmetric block diagonal
               matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
               (superdiagonal (or subdiagonal) elements of D
                should be provided on entry in array E), and
            b) If UPLO = 'U': factor U in the superdiagonal part of A.
               If UPLO = 'L': factor L in the subdiagonal part of A.

          On exit, if INFO = 0, the symmetric inverse of the original
          matrix.
             If UPLO = 'U': the upper triangular part of the inverse
             is formed and the part of A below the diagonal is not
             referenced;
             If UPLO = 'L': the lower triangular part of the inverse
             is formed and the part of A above the diagonal is not
             referenced.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[in]E
          E is REAL array, dimension (N)
          On entry, contains the superdiagonal (or subdiagonal)
          elements of the symmetric block diagonal matrix D
          with 1-by-1 or 2-by-2 diagonal blocks, where
          If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
          If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.

          NOTE: For 1-by-1 diagonal block D(k), where
          1 <= k <= N, the element E(k) is not referenced in both
          UPLO = 'U' or UPLO = 'L' cases.
[in]IPIV
          IPIV is INTEGER array, dimension (N)
          Details of the interchanges and the block structure of D
          as determined by SSYTRF_RK or SSYTRF_BK.
[out]WORK
          WORK is REAL array, dimension (N+NB+1,NB+3).
[in]NB
          NB is INTEGER
          Block size.
[out]INFO
          INFO is INTEGER
          = 0: successful exit
          < 0: if INFO = -i, the i-th argument had an illegal value
          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
               inverse could not be computed.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
  June 2017,  Igor Kozachenko,
                  Computer Science Division,
                  University of California, Berkeley

Definition at line 158 of file ssytri_3x.f.

159 *
160 * -- LAPACK computational routine --
161 * -- LAPACK is a software package provided by Univ. of Tennessee, --
162 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163 *
164 * .. Scalar Arguments ..
165  CHARACTER UPLO
166  INTEGER INFO, LDA, N, NB
167 * ..
168 * .. Array Arguments ..
169  INTEGER IPIV( * )
170  REAL A( LDA, * ), E( * ), WORK( N+NB+1, * )
171 * ..
172 *
173 * =====================================================================
174 *
175 * .. Parameters ..
176  REAL ONE, ZERO
177  parameter( one = 1.0e+0, zero = 0.0e+0 )
178 * ..
179 * .. Local Scalars ..
180  LOGICAL UPPER
181  INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
182  REAL AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J,
183  $ U11_I_J, U11_IP1_J
184 * ..
185 * .. External Functions ..
186  LOGICAL LSAME
187  EXTERNAL lsame
188 * ..
189 * .. External Subroutines ..
190  EXTERNAL sgemm, ssyswapr, strtri, strmm, xerbla
191 * ..
192 * .. Intrinsic Functions ..
193  INTRINSIC abs, max, mod
194 * ..
195 * .. Executable Statements ..
196 *
197 * Test the input parameters.
198 *
199  info = 0
200  upper = lsame( uplo, 'U' )
201  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
202  info = -1
203  ELSE IF( n.LT.0 ) THEN
204  info = -2
205  ELSE IF( lda.LT.max( 1, n ) ) THEN
206  info = -4
207  END IF
208 *
209 * Quick return if possible
210 *
211  IF( info.NE.0 ) THEN
212  CALL xerbla( 'SSYTRI_3X', -info )
213  RETURN
214  END IF
215  IF( n.EQ.0 )
216  $ RETURN
217 *
218 * Workspace got Non-diag elements of D
219 *
220  DO k = 1, n
221  work( k, 1 ) = e( k )
222  END DO
223 *
224 * Check that the diagonal matrix D is nonsingular.
225 *
226  IF( upper ) THEN
227 *
228 * Upper triangular storage: examine D from bottom to top
229 *
230  DO info = n, 1, -1
231  IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.zero )
232  $ RETURN
233  END DO
234  ELSE
235 *
236 * Lower triangular storage: examine D from top to bottom.
237 *
238  DO info = 1, n
239  IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.zero )
240  $ RETURN
241  END DO
242  END IF
243 *
244  info = 0
245 *
246 * Splitting Workspace
247 * U01 is a block ( N, NB+1 )
248 * The first element of U01 is in WORK( 1, 1 )
249 * U11 is a block ( NB+1, NB+1 )
250 * The first element of U11 is in WORK( N+1, 1 )
251 *
252  u11 = n
253 *
254 * INVD is a block ( N, 2 )
255 * The first element of INVD is in WORK( 1, INVD )
256 *
257  invd = nb + 2
258 
259  IF( upper ) THEN
260 *
261 * Begin Upper
262 *
263 * invA = P * inv(U**T) * inv(D) * inv(U) * P**T.
264 *
265  CALL strtri( uplo, 'U', n, a, lda, info )
266 *
267 * inv(D) and inv(D) * inv(U)
268 *
269  k = 1
270  DO WHILE( k.LE.n )
271  IF( ipiv( k ).GT.0 ) THEN
272 * 1 x 1 diagonal NNB
273  work( k, invd ) = one / a( k, k )
274  work( k, invd+1 ) = zero
275  ELSE
276 * 2 x 2 diagonal NNB
277  t = work( k+1, 1 )
278  ak = a( k, k ) / t
279  akp1 = a( k+1, k+1 ) / t
280  akkp1 = work( k+1, 1 ) / t
281  d = t*( ak*akp1-one )
282  work( k, invd ) = akp1 / d
283  work( k+1, invd+1 ) = ak / d
284  work( k, invd+1 ) = -akkp1 / d
285  work( k+1, invd ) = work( k, invd+1 )
286  k = k + 1
287  END IF
288  k = k + 1
289  END DO
290 *
291 * inv(U**T) = (inv(U))**T
292 *
293 * inv(U**T) * inv(D) * inv(U)
294 *
295  cut = n
296  DO WHILE( cut.GT.0 )
297  nnb = nb
298  IF( cut.LE.nnb ) THEN
299  nnb = cut
300  ELSE
301  icount = 0
302 * count negative elements,
303  DO i = cut+1-nnb, cut
304  IF( ipiv( i ).LT.0 ) icount = icount + 1
305  END DO
306 * need a even number for a clear cut
307  IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
308  END IF
309 
310  cut = cut - nnb
311 *
312 * U01 Block
313 *
314  DO i = 1, cut
315  DO j = 1, nnb
316  work( i, j ) = a( i, cut+j )
317  END DO
318  END DO
319 *
320 * U11 Block
321 *
322  DO i = 1, nnb
323  work( u11+i, i ) = one
324  DO j = 1, i-1
325  work( u11+i, j ) = zero
326  END DO
327  DO j = i+1, nnb
328  work( u11+i, j ) = a( cut+i, cut+j )
329  END DO
330  END DO
331 *
332 * invD * U01
333 *
334  i = 1
335  DO WHILE( i.LE.cut )
336  IF( ipiv( i ).GT.0 ) THEN
337  DO j = 1, nnb
338  work( i, j ) = work( i, invd ) * work( i, j )
339  END DO
340  ELSE
341  DO j = 1, nnb
342  u01_i_j = work( i, j )
343  u01_ip1_j = work( i+1, j )
344  work( i, j ) = work( i, invd ) * u01_i_j
345  $ + work( i, invd+1 ) * u01_ip1_j
346  work( i+1, j ) = work( i+1, invd ) * u01_i_j
347  $ + work( i+1, invd+1 ) * u01_ip1_j
348  END DO
349  i = i + 1
350  END IF
351  i = i + 1
352  END DO
353 *
354 * invD1 * U11
355 *
356  i = 1
357  DO WHILE ( i.LE.nnb )
358  IF( ipiv( cut+i ).GT.0 ) THEN
359  DO j = i, nnb
360  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
361  END DO
362  ELSE
363  DO j = i, nnb
364  u11_i_j = work(u11+i,j)
365  u11_ip1_j = work(u11+i+1,j)
366  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
367  $ + work(cut+i,invd+1) * work(u11+i+1,j)
368  work( u11+i+1, j ) = work(cut+i+1,invd) * u11_i_j
369  $ + work(cut+i+1,invd+1) * u11_ip1_j
370  END DO
371  i = i + 1
372  END IF
373  i = i + 1
374  END DO
375 *
376 * U11**T * invD1 * U11 -> U11
377 *
378  CALL strmm( 'L', 'U', 'T', 'U', nnb, nnb,
379  $ one, a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
380  $ n+nb+1 )
381 *
382  DO i = 1, nnb
383  DO j = i, nnb
384  a( cut+i, cut+j ) = work( u11+i, j )
385  END DO
386  END DO
387 *
388 * U01**T * invD * U01 -> A( CUT+I, CUT+J )
389 *
390  CALL sgemm( 'T', 'N', nnb, nnb, cut, one, a( 1, cut+1 ),
391  $ lda, work, n+nb+1, zero, work(u11+1,1), n+nb+1 )
392 
393 *
394 * U11 = U11**T * invD1 * U11 + U01**T * invD * U01
395 *
396  DO i = 1, nnb
397  DO j = i, nnb
398  a( cut+i, cut+j ) = a( cut+i, cut+j ) + work(u11+i,j)
399  END DO
400  END DO
401 *
402 * U01 = U00**T * invD0 * U01
403 *
404  CALL strmm( 'L', uplo, 'T', 'U', cut, nnb,
405  $ one, a, lda, work, n+nb+1 )
406 
407 *
408 * Update U01
409 *
410  DO i = 1, cut
411  DO j = 1, nnb
412  a( i, cut+j ) = work( i, j )
413  END DO
414  END DO
415 *
416 * Next Block
417 *
418  END DO
419 *
420 * Apply PERMUTATIONS P and P**T:
421 * P * inv(U**T) * inv(D) * inv(U) * P**T.
422 * Interchange rows and columns I and IPIV(I) in reverse order
423 * from the formation order of IPIV vector for Upper case.
424 *
425 * ( We can use a loop over IPIV with increment 1,
426 * since the ABS value of IPIV(I) represents the row (column)
427 * index of the interchange with row (column) i in both 1x1
428 * and 2x2 pivot cases, i.e. we don't need separate code branches
429 * for 1x1 and 2x2 pivot cases )
430 *
431  DO i = 1, n
432  ip = abs( ipiv( i ) )
433  IF( ip.NE.i ) THEN
434  IF (i .LT. ip) CALL ssyswapr( uplo, n, a, lda, i ,ip )
435  IF (i .GT. ip) CALL ssyswapr( uplo, n, a, lda, ip ,i )
436  END IF
437  END DO
438 *
439  ELSE
440 *
441 * Begin Lower
442 *
443 * inv A = P * inv(L**T) * inv(D) * inv(L) * P**T.
444 *
445  CALL strtri( uplo, 'U', n, a, lda, info )
446 *
447 * inv(D) and inv(D) * inv(L)
448 *
449  k = n
450  DO WHILE ( k .GE. 1 )
451  IF( ipiv( k ).GT.0 ) THEN
452 * 1 x 1 diagonal NNB
453  work( k, invd ) = one / a( k, k )
454  work( k, invd+1 ) = zero
455  ELSE
456 * 2 x 2 diagonal NNB
457  t = work( k-1, 1 )
458  ak = a( k-1, k-1 ) / t
459  akp1 = a( k, k ) / t
460  akkp1 = work( k-1, 1 ) / t
461  d = t*( ak*akp1-one )
462  work( k-1, invd ) = akp1 / d
463  work( k, invd ) = ak / d
464  work( k, invd+1 ) = -akkp1 / d
465  work( k-1, invd+1 ) = work( k, invd+1 )
466  k = k - 1
467  END IF
468  k = k - 1
469  END DO
470 *
471 * inv(L**T) = (inv(L))**T
472 *
473 * inv(L**T) * inv(D) * inv(L)
474 *
475  cut = 0
476  DO WHILE( cut.LT.n )
477  nnb = nb
478  IF( (cut + nnb).GT.n ) THEN
479  nnb = n - cut
480  ELSE
481  icount = 0
482 * count negative elements,
483  DO i = cut + 1, cut+nnb
484  IF ( ipiv( i ).LT.0 ) icount = icount + 1
485  END DO
486 * need a even number for a clear cut
487  IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1
488  END IF
489 *
490 * L21 Block
491 *
492  DO i = 1, n-cut-nnb
493  DO j = 1, nnb
494  work( i, j ) = a( cut+nnb+i, cut+j )
495  END DO
496  END DO
497 *
498 * L11 Block
499 *
500  DO i = 1, nnb
501  work( u11+i, i) = one
502  DO j = i+1, nnb
503  work( u11+i, j ) = zero
504  END DO
505  DO j = 1, i-1
506  work( u11+i, j ) = a( cut+i, cut+j )
507  END DO
508  END DO
509 *
510 * invD*L21
511 *
512  i = n-cut-nnb
513  DO WHILE( i.GE.1 )
514  IF( ipiv( cut+nnb+i ).GT.0 ) THEN
515  DO j = 1, nnb
516  work( i, j ) = work( cut+nnb+i, invd) * work( i, j)
517  END DO
518  ELSE
519  DO j = 1, nnb
520  u01_i_j = work(i,j)
521  u01_ip1_j = work(i-1,j)
522  work(i,j)=work(cut+nnb+i,invd)*u01_i_j+
523  $ work(cut+nnb+i,invd+1)*u01_ip1_j
524  work(i-1,j)=work(cut+nnb+i-1,invd+1)*u01_i_j+
525  $ work(cut+nnb+i-1,invd)*u01_ip1_j
526  END DO
527  i = i - 1
528  END IF
529  i = i - 1
530  END DO
531 *
532 * invD1*L11
533 *
534  i = nnb
535  DO WHILE( i.GE.1 )
536  IF( ipiv( cut+i ).GT.0 ) THEN
537  DO j = 1, nnb
538  work( u11+i, j ) = work( cut+i, invd)*work(u11+i,j)
539  END DO
540 
541  ELSE
542  DO j = 1, nnb
543  u11_i_j = work( u11+i, j )
544  u11_ip1_j = work( u11+i-1, j )
545  work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)
546  $ + work(cut+i,invd+1) * u11_ip1_j
547  work( u11+i-1, j ) = work(cut+i-1,invd+1) * u11_i_j
548  $ + work(cut+i-1,invd) * u11_ip1_j
549  END DO
550  i = i - 1
551  END IF
552  i = i - 1
553  END DO
554 *
555 * L11**T * invD1 * L11 -> L11
556 *
557  CALL strmm( 'L', uplo, 'T', 'U', nnb, nnb, one,
558  $ a( cut+1, cut+1 ), lda, work( u11+1, 1 ),
559  $ n+nb+1 )
560 
561 *
562  DO i = 1, nnb
563  DO j = 1, i
564  a( cut+i, cut+j ) = work( u11+i, j )
565  END DO
566  END DO
567 *
568  IF( (cut+nnb).LT.n ) THEN
569 *
570 * L21**T * invD2*L21 -> A( CUT+I, CUT+J )
571 *
572  CALL sgemm( 'T', 'N', nnb, nnb, n-nnb-cut, one,
573  $ a( cut+nnb+1, cut+1 ), lda, work, n+nb+1,
574  $ zero, work( u11+1, 1 ), n+nb+1 )
575 
576 *
577 * L11 = L11**T * invD1 * L11 + U01**T * invD * U01
578 *
579  DO i = 1, nnb
580  DO j = 1, i
581  a( cut+i, cut+j ) = a( cut+i, cut+j )+work(u11+i,j)
582  END DO
583  END DO
584 *
585 * L01 = L22**T * invD2 * L21
586 *
587  CALL strmm( 'L', uplo, 'T', 'U', n-nnb-cut, nnb, one,
588  $ a( cut+nnb+1, cut+nnb+1 ), lda, work,
589  $ n+nb+1 )
590 *
591 * Update L21
592 *
593  DO i = 1, n-cut-nnb
594  DO j = 1, nnb
595  a( cut+nnb+i, cut+j ) = work( i, j )
596  END DO
597  END DO
598 *
599  ELSE
600 *
601 * L11 = L11**T * invD1 * L11
602 *
603  DO i = 1, nnb
604  DO j = 1, i
605  a( cut+i, cut+j ) = work( u11+i, j )
606  END DO
607  END DO
608  END IF
609 *
610 * Next Block
611 *
612  cut = cut + nnb
613 *
614  END DO
615 *
616 * Apply PERMUTATIONS P and P**T:
617 * P * inv(L**T) * inv(D) * inv(L) * P**T.
618 * Interchange rows and columns I and IPIV(I) in reverse order
619 * from the formation order of IPIV vector for Lower case.
620 *
621 * ( We can use a loop over IPIV with increment -1,
622 * since the ABS value of IPIV(I) represents the row (column)
623 * index of the interchange with row (column) i in both 1x1
624 * and 2x2 pivot cases, i.e. we don't need separate code branches
625 * for 1x1 and 2x2 pivot cases )
626 *
627  DO i = n, 1, -1
628  ip = abs( ipiv( i ) )
629  IF( ip.NE.i ) THEN
630  IF (i .LT. ip) CALL ssyswapr( uplo, n, a, lda, i ,ip )
631  IF (i .GT. ip) CALL ssyswapr( uplo, n, a, lda, ip ,i )
632  END IF
633  END DO
634 *
635  END IF
636 *
637  RETURN
638 *
639 * End of SSYTRI_3X
640 *
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine strtri(UPLO, DIAG, N, A, LDA, INFO)
STRTRI
Definition: strtri.f:109
subroutine ssyswapr(UPLO, N, A, LDA, I1, I2)
SSYSWAPR applies an elementary permutation on the rows and columns of a symmetric matrix.
Definition: ssyswapr.f:102
subroutine strmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRMM
Definition: strmm.f:177
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:187
Here is the call graph for this function:
Here is the caller graph for this function: