de/d37/ssytri__3x_8f_source.html

*> \brief \b SSYTRI_3X

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> \htmlonly

*> Download SSYTRI_3X + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssytri_3x.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssytri_3x.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssytri_3x.f">

*> [TXT]</a>

*> \endhtmlonly

*

*  Definition:

*  ===========

*

*       SUBROUTINE SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )

*

*       .. Scalar Arguments ..

*       CHARACTER          UPLO

*       INTEGER            INFO, LDA, N, NB

*       ..

*       .. Array Arguments ..

*       INTEGER            IPIV( * )

*       REAL               A( LDA, * ),  E( * ), WORK( N+NB+1, * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*> SSYTRI_3X computes the inverse of a real symmetric indefinite

*> matrix A using the factorization computed by SSYTRF_RK or SSYTRF_BK:

*>

*>     A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),

*>

*> where U (or L) is unit upper (or lower) triangular matrix,

*> U**T (or L**T) is the transpose of U (or L), P is a permutation

*> matrix, P**T is the transpose of P, and D is symmetric and block

*> diagonal with 1-by-1 and 2-by-2 diagonal blocks.

*>

*> This is the blocked version of the algorithm, calling Level 3 BLAS.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] UPLO

*> \verbatim

*>          UPLO is CHARACTER*1

*>          Specifies whether the details of the factorization are

*>          stored as an upper or lower triangular matrix.

*>          = 'U':  Upper triangle of A is stored;

*>          = 'L':  Lower triangle of A is stored.

*> \endverbatim

*>

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>          The order of the matrix A.  N >= 0.

*> \endverbatim

*>

*> \param[in,out] A

*> \verbatim

*>          A is REAL array, dimension (LDA,N)

*>          On entry, diagonal of the block diagonal matrix D and

*>          factors U or L as computed by SYTRF_RK and SSYTRF_BK:

*>            a) ONLY diagonal elements of the symmetric block diagonal

*>               matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);

*>               (superdiagonal (or subdiagonal) elements of D

*>                should be provided on entry in array E), and

*>            b) If UPLO = 'U': factor U in the superdiagonal part of A.

*>               If UPLO = 'L': factor L in the subdiagonal part of A.

*>

*>          On exit, if INFO = 0, the symmetric inverse of the original

*>          matrix.

*>             If UPLO = 'U': the upper triangular part of the inverse

*>             is formed and the part of A below the diagonal is not

*>             referenced;

*>             If UPLO = 'L': the lower triangular part of the inverse

*>             is formed and the part of A above the diagonal is not

*>             referenced.

*> \endverbatim

*>

*> \param[in] LDA

*> \verbatim

*>          LDA is INTEGER

*>          The leading dimension of the array A.  LDA >= max(1,N).

*> \endverbatim

*>

*> \param[in] E

*> \verbatim

*>          E is REAL array, dimension (N)

*>          On entry, contains the superdiagonal (or subdiagonal)

*>          elements of the symmetric block diagonal matrix D

*>          with 1-by-1 or 2-by-2 diagonal blocks, where

*>          If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;

*>          If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.

*>

*>          NOTE: For 1-by-1 diagonal block D(k), where

*>          1 <= k <= N, the element E(k) is not referenced in both

*>          UPLO = 'U' or UPLO = 'L' cases.

*> \endverbatim

*>

*> \param[in] IPIV

*> \verbatim

*>          IPIV is INTEGER array, dimension (N)

*>          Details of the interchanges and the block structure of D

*>          as determined by SSYTRF_RK or SSYTRF_BK.

*> \endverbatim

*>

*> \param[out] WORK

*> \verbatim

*>          WORK is REAL array, dimension (N+NB+1,NB+3).

*> \endverbatim

*>

*> \param[in] NB

*> \verbatim

*>          NB is INTEGER

*>          Block size.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0: successful exit

*>          < 0: if INFO = -i, the i-th argument had an illegal value

*>          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its

*>               inverse could not be computed.

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \ingroup hetri_3x

*

*> \par Contributors:

*  ==================

*> \verbatim

*>

*>  June 2017,  Igor Kozachenko,

*>                  Computer Science Division,

*>                  University of California, Berkeley

*>

*> \endverbatim

*

*  =====================================================================

      SUBROUTINE ssytri_3x( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )

*

*  -- LAPACK computational routine --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*

*     .. Scalar Arguments ..

      CHARACTER          UPLO

      INTEGER            INFO, LDA, N, NB

*     ..

*     .. Array Arguments ..

      INTEGER            IPIV( * )

      REAL              A( LDA, * ), E( * ), WORK( N+NB+1, * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      REAL               ONE, ZERO

      parameter( one = 1.0e+0, zero = 0.0e+0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            UPPER

      INTEGER            CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11

      REAL               AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J,

     $                   U11_I_J, U11_IP1_J

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      EXTERNAL           lsame

*     ..

*     .. External Subroutines ..

      EXTERNAL           sgemm, ssyswapr, strtri, strmm, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max, mod

*     ..

*     .. Executable Statements ..

*

*     Test the input parameters.

*

      info = 0

      upper = lsame( uplo, 'U' )

      IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN

         info = -1

      ELSE IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( lda.LT.max( 1, n ) ) THEN

         info = -4

      END IF

*

*     Quick return if possible

*

      IF( info.NE.0 ) THEN

         CALL xerbla( 'SSYTRI_3X', -info )

         RETURN

      END IF

      IF( n.EQ.0 )

     $   RETURN

*

*     Workspace got Non-diag elements of D

*

      DO k = 1, n

         work( k, 1 ) = e( k )

      END DO

*

*     Check that the diagonal matrix D is nonsingular.

*

      IF( upper ) THEN

*

*        Upper triangular storage: examine D from bottom to top

*

         DO info = n, 1, -1

            IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.zero )

     $         RETURN

         END DO

      ELSE

*

*        Lower triangular storage: examine D from top to bottom.

*

         DO info = 1, n

            IF( ipiv( info ).GT.0 .AND. a( info, info ).EQ.zero )

     $         RETURN

         END DO

      END IF

*

      info = 0

*

*     Splitting Workspace

*     U01 is a block ( N, NB+1 )

*     The first element of U01 is in WORK( 1, 1 )

*     U11 is a block ( NB+1, NB+1 )

*     The first element of U11 is in WORK( N+1, 1 )

*

      u11 = n

*

*     INVD is a block ( N, 2 )

*     The first element of INVD is in WORK( 1, INVD )

*

      invd = nb + 2


      IF( upper ) THEN

*

*        Begin Upper

*

*        invA = P * inv(U**T) * inv(D) * inv(U) * P**T.

*

         CALL strtri( uplo, 'U', n, a, lda, info )

*

*        inv(D) and inv(D) * inv(U)

*

         k = 1

         DO WHILE( k.LE.n )

            IF( ipiv( k ).GT.0 ) THEN

*              1 x 1 diagonal NNB

               work( k, invd ) = one /  a( k, k )

               work( k, invd+1 ) = zero

            ELSE

*              2 x 2 diagonal NNB

               t = work( k+1, 1 )

               ak = a( k, k ) / t

               akp1 = a( k+1, k+1 ) / t

               akkp1 = work( k+1, 1 )  / t

               d = t*( ak*akp1-one )

               work( k, invd ) = akp1 / d

               work( k+1, invd+1 ) = ak / d

               work( k, invd+1 ) = -akkp1 / d

               work( k+1, invd ) = work( k, invd+1 )

               k = k + 1

            END IF

            k = k + 1

         END DO

*

*        inv(U**T) = (inv(U))**T

*

*        inv(U**T) * inv(D) * inv(U)

*

         cut = n

         DO WHILE( cut.GT.0 )

            nnb = nb

            IF( cut.LE.nnb ) THEN

               nnb = cut

            ELSE

               icount = 0

*              count negative elements,

               DO i = cut+1-nnb, cut

                  IF( ipiv( i ).LT.0 ) icount = icount + 1

               END DO

*              need a even number for a clear cut

               IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1

            END IF


            cut = cut - nnb

*

*           U01 Block

*

            DO i = 1, cut

               DO j = 1, nnb

                  work( i, j ) = a( i, cut+j )

               END DO

            END DO

*

*           U11 Block

*

            DO i = 1, nnb

               work( u11+i, i ) = one

               DO j = 1, i-1

                  work( u11+i, j ) = zero

                END DO

                DO j = i+1, nnb

                   work( u11+i, j ) = a( cut+i, cut+j )

                END DO

            END DO

*

*           invD * U01

*

            i = 1

            DO WHILE( i.LE.cut )

               IF( ipiv( i ).GT.0 ) THEN

                  DO j = 1, nnb

                     work( i, j ) = work( i, invd ) * work( i, j )

                  END DO

               ELSE

                  DO j = 1, nnb

                     u01_i_j = work( i, j )

                     u01_ip1_j = work( i+1, j )

                     work( i, j ) = work( i, invd ) * u01_i_j

     $                            + work( i, invd+1 ) * u01_ip1_j

                     work( i+1, j ) = work( i+1, invd ) * u01_i_j

     $                              + work( i+1, invd+1 ) * u01_ip1_j

                  END DO

                  i = i + 1

               END IF

               i = i + 1

            END DO

*

*           invD1 * U11

*

            i = 1

            DO WHILE ( i.LE.nnb )

               IF( ipiv( cut+i ).GT.0 ) THEN

                  DO j = i, nnb

                     work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)

                  END DO

               ELSE

                  DO j = i, nnb

                     u11_i_j = work(u11+i,j)

                     u11_ip1_j = work(u11+i+1,j)

                     work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)

     $                            + work(cut+i,invd+1) * work(u11+i+1,j)

                     work( u11+i+1, j ) = work(cut+i+1,invd) * u11_i_j

     $                               + work(cut+i+1,invd+1) * u11_ip1_j

                  END DO

                  i = i + 1

               END IF

               i = i + 1

            END DO

*

*           U11**T * invD1 * U11 -> U11

*

            CALL strmm( 'L', 'U', 'T', 'U', nnb, nnb,

     $                 one, a( cut+1, cut+1 ), lda, work( u11+1, 1 ),

     $                 n+nb+1 )

*

            DO i = 1, nnb

               DO j = i, nnb

                  a( cut+i, cut+j ) = work( u11+i, j )

               END DO

            END DO

*

*           U01**T * invD * U01 -> A( CUT+I, CUT+J )

*

            CALL sgemm( 'T', 'N', nnb, nnb, cut, one, a( 1, cut+1 ),

     $                  lda, work, n+nb+1, zero, work(u11+1,1), n+nb+1 )


*

*           U11 =  U11**T * invD1 * U11 + U01**T * invD * U01

*

            DO i = 1, nnb

               DO j = i, nnb

                  a( cut+i, cut+j ) = a( cut+i, cut+j ) + work(u11+i,j)

               END DO

            END DO

*

*           U01 =  U00**T * invD0 * U01

*

            CALL strmm( 'L', uplo, 'T', 'U', cut, nnb,

     $                  one, a, lda, work, n+nb+1 )


*

*           Update U01

*

            DO i = 1, cut

               DO j = 1, nnb

                  a( i, cut+j ) = work( i, j )

               END DO

            END DO

*

*           Next Block

*

         END DO

*

*        Apply PERMUTATIONS P and P**T:

*        P * inv(U**T) * inv(D) * inv(U) * P**T.

*        Interchange rows and columns I and IPIV(I) in reverse order

*        from the formation order of IPIV vector for Upper case.

*

*        ( We can use a loop over IPIV with increment 1,

*        since the ABS value of IPIV(I) represents the row (column)

*        index of the interchange with row (column) i in both 1x1

*        and 2x2 pivot cases, i.e. we don't need separate code branches

*        for 1x1 and 2x2 pivot cases )

*

         DO i = 1, n

             ip = abs( ipiv( i ) )

             IF( ip.NE.i ) THEN

                IF (i .LT. ip) CALL ssyswapr( uplo, n, a, lda, i ,ip )

                IF (i .GT. ip) CALL ssyswapr( uplo, n, a, lda, ip ,i )

             END IF

         END DO

*

      ELSE

*

*        Begin Lower

*

*        inv A = P * inv(L**T) * inv(D) * inv(L) * P**T.

*

         CALL strtri( uplo, 'U', n, a, lda, info )

*

*        inv(D) and inv(D) * inv(L)

*

         k = n

         DO WHILE ( k .GE. 1 )

            IF( ipiv( k ).GT.0 ) THEN

*              1 x 1 diagonal NNB

               work( k, invd ) = one /  a( k, k )

               work( k, invd+1 ) = zero

            ELSE

*              2 x 2 diagonal NNB

               t = work( k-1, 1 )

               ak = a( k-1, k-1 ) / t

               akp1 = a( k, k ) / t

               akkp1 = work( k-1, 1 ) / t

               d = t*( ak*akp1-one )

               work( k-1, invd ) = akp1 / d

               work( k, invd ) = ak / d

               work( k, invd+1 ) = -akkp1 / d

               work( k-1, invd+1 ) = work( k, invd+1 )

               k = k - 1

            END IF

            k = k - 1

         END DO

*

*        inv(L**T) = (inv(L))**T

*

*        inv(L**T) * inv(D) * inv(L)

*

         cut = 0

         DO WHILE( cut.LT.n )

            nnb = nb

            IF( (cut + nnb).GT.n ) THEN

               nnb = n - cut

            ELSE

               icount = 0

*              count negative elements,

               DO i = cut + 1, cut+nnb

                  IF ( ipiv( i ).LT.0 ) icount = icount + 1

               END DO

*              need a even number for a clear cut

               IF( mod( icount, 2 ).EQ.1 ) nnb = nnb + 1

            END IF

*

*           L21 Block

*

            DO i = 1, n-cut-nnb

               DO j = 1, nnb

                 work( i, j ) = a( cut+nnb+i, cut+j )

               END DO

            END DO

*

*           L11 Block

*

            DO i = 1, nnb

               work( u11+i, i) = one

               DO j = i+1, nnb

                  work( u11+i, j ) = zero

               END DO

               DO j = 1, i-1

                  work( u11+i, j ) = a( cut+i, cut+j )

               END DO

            END DO

*

*           invD*L21

*

            i = n-cut-nnb

            DO WHILE( i.GE.1 )

               IF( ipiv( cut+nnb+i ).GT.0 ) THEN

                  DO j = 1, nnb

                     work( i, j ) = work( cut+nnb+i, invd) * work( i, j)

                  END DO

               ELSE

                  DO j = 1, nnb

                     u01_i_j = work(i,j)

                     u01_ip1_j = work(i-1,j)

                     work(i,j)=work(cut+nnb+i,invd)*u01_i_j+

     $                        work(cut+nnb+i,invd+1)*u01_ip1_j

                     work(i-1,j)=work(cut+nnb+i-1,invd+1)*u01_i_j+

     $                        work(cut+nnb+i-1,invd)*u01_ip1_j

                  END DO

                  i = i - 1

               END IF

               i = i - 1

            END DO

*

*           invD1*L11

*

            i = nnb

            DO WHILE( i.GE.1 )

               IF( ipiv( cut+i ).GT.0 ) THEN

                  DO j = 1, nnb

                     work( u11+i, j ) = work( cut+i, invd)*work(u11+i,j)

                  END DO


               ELSE

                  DO j = 1, nnb

                     u11_i_j = work( u11+i, j )

                     u11_ip1_j = work( u11+i-1, j )

                     work( u11+i, j ) = work(cut+i,invd) * work(u11+i,j)

     $                                + work(cut+i,invd+1) * u11_ip1_j

                     work( u11+i-1, j ) = work(cut+i-1,invd+1) * u11_i_j

     $                                  + work(cut+i-1,invd) * u11_ip1_j

                  END DO

                  i = i - 1

               END IF

               i = i - 1

            END DO

*

*           L11**T * invD1 * L11 -> L11

*

            CALL strmm( 'L', uplo, 'T', 'U', nnb, nnb, one,

     $                   a( cut+1, cut+1 ), lda, work( u11+1, 1 ),

     $                   n+nb+1 )


*

            DO i = 1, nnb

               DO j = 1, i

                  a( cut+i, cut+j ) = work( u11+i, j )

               END DO

            END DO

*

            IF( (cut+nnb).LT.n ) THEN

*

*              L21**T * invD2*L21 -> A( CUT+I, CUT+J )

*

               CALL sgemm( 'T', 'N', nnb, nnb, n-nnb-cut, one,

     $                     a( cut+nnb+1, cut+1 ), lda, work, n+nb+1,

     $                     zero, work( u11+1, 1 ), n+nb+1 )


*

*              L11 =  L11**T * invD1 * L11 + U01**T * invD * U01

*

               DO i = 1, nnb

                  DO j = 1, i

                     a( cut+i, cut+j ) = a( cut+i, cut+j )+work(u11+i,j)

                  END DO

               END DO

*

*              L01 =  L22**T * invD2 * L21

*

               CALL strmm( 'L', uplo, 'T', 'U', n-nnb-cut, nnb, one,

     $                     a( cut+nnb+1, cut+nnb+1 ), lda, work,

     $                     n+nb+1 )

*

*              Update L21

*

               DO i = 1, n-cut-nnb

                  DO j = 1, nnb

                     a( cut+nnb+i, cut+j ) = work( i, j )

                  END DO

               END DO

*

            ELSE

*

*              L11 =  L11**T * invD1 * L11

*

               DO i = 1, nnb

                  DO j = 1, i

                     a( cut+i, cut+j ) = work( u11+i, j )

                  END DO

               END DO

            END IF

*

*           Next Block

*

            cut = cut + nnb

*

         END DO

*

*        Apply PERMUTATIONS P and P**T:

*        P * inv(L**T) * inv(D) * inv(L) * P**T.

*        Interchange rows and columns I and IPIV(I) in reverse order

*        from the formation order of IPIV vector for Lower case.

*

*        ( We can use a loop over IPIV with increment -1,

*        since the ABS value of IPIV(I) represents the row (column)

*        index of the interchange with row (column) i in both 1x1

*        and 2x2 pivot cases, i.e. we don't need separate code branches

*        for 1x1 and 2x2 pivot cases )

*

         DO i = n, 1, -1

             ip = abs( ipiv( i ) )

             IF( ip.NE.i ) THEN

                IF (i .LT. ip) CALL ssyswapr( uplo, n, a, lda, i ,ip )

                IF (i .GT. ip) CALL ssyswapr( uplo, n, a, lda, ip ,i )

             END IF

         END DO

*

      END IF

*

      RETURN

*

*     End of SSYTRI_3X

*

      END


xerbla
subroutine xerbla(srname, info)
Definition cblat2.f:3285

sgemm
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188

ssyswapr
subroutine ssyswapr(uplo, n, a, lda, i1, i2)
SSYSWAPR applies an elementary permutation on the rows and columns of a symmetric matrix.
Definition ssyswapr.f:100

ssytri_3x
subroutine ssytri_3x(uplo, n, a, lda, e, ipiv, work, nb, info)
SSYTRI_3X
Definition ssytri_3x.f:159

strmm
subroutine strmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
STRMM
Definition strmm.f:177

strtri
subroutine strtri(uplo, diag, n, a, lda, info)
STRTRI
Definition strtri.f:109