◆ chetrf_aa()

subroutine chetrf_aa	(	character	uplo,
		integer	n,
		complex, dimension( lda, * )	a,
		integer	lda,
		integer, dimension( * )	ipiv,
		complex, dimension( * )	work,
		integer	lwork,
		integer	info
	)

CHETRF_AA

Download CHETRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:

 CHETRF_AA computes the factorization of a complex hermitian matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**H*T*U  or  A = L*T*L**H

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a hermitian tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.

Parameters

[in]	UPLO	UPLO is CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored.
[in]	N	N is INTEGER The order of the matrix A. N >= 0.
[in,out]	A	A is COMPLEX array, dimension (LDA,N) On entry, the hermitian matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the tridiagonal matrix is stored in the diagonals and the subdiagonals of A just below (or above) the diagonals, and L is stored below (or above) the subdiagonals, when UPLO is 'L' (or 'U').
[in]	LDA	LDA is INTEGER The leading dimension of the array A. LDA >= max(1,N).
[out]	IPIV	IPIV is INTEGER array, dimension (N) On exit, it contains the details of the interchanges, i.e., the row and column k of A were interchanged with the row and column IPIV(k).
[out]	WORK	WORK is COMPLEX array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]	LWORK	LWORK is INTEGER The length of WORK. LWORK >= 2N. For optimum performance LWORK >= N(1+NB), where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]	INFO	INFO is INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value.

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Definition at line 131 of file chetrf_aa.f.

*
*  -- LAPACK computational routine --
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
      IMPLICIT NONE
*
*     .. Scalar Arguments ..
      CHARACTER    UPLO
      INTEGER      N, LDA, LWORK, INFO
*     ..
*     .. Array Arguments ..
      INTEGER      IPIV( * )
      COMPLEX      A( LDA, * ), WORK( * )
*     ..
*
*  =====================================================================
*     .. Parameters ..
      COMPLEX      ZERO, ONE
      parameter( zero = (0.0e+0, 0.0e+0), one = (1.0e+0, 0.0e+0) )
*
*     .. Local Scalars ..
      LOGICAL      LQUERY, UPPER
      INTEGER      J, LWKOPT
      INTEGER      NB, MJ, NJ, K1, K2, J1, J2, J3, JB
      COMPLEX      ALPHA
*     ..
*     .. External Functions ..
      LOGICAL      LSAME
      INTEGER      ILAENV
      REAL         SROUNDUP_LWORK
      EXTERNAL     lsame, ilaenv, sroundup_lwork
*     ..
*     .. External Subroutines ..
      EXTERNAL     clahef_aa, cgemm, ccopy, cswap, cscal, xerbla
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC    real, conjg, max
*     ..
*     .. Executable Statements ..
*
*     Determine the block size
*
      nb = ilaenv( 1, 'CHETRF_AA', uplo, n, -1, -1, -1 )
*
*     Test the input parameters.
*
      info = 0
      upper = lsame( uplo, 'U' )
      lquery = ( lwork.EQ.-1 )
      IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
         info = -1
      ELSE IF( n.LT.0 ) THEN
         info = -2
      ELSE IF( lda.LT.max( 1, n ) ) THEN
         info = -4
      ELSE IF( lwork.LT.( 2*n ) .AND. .NOT.lquery ) THEN
         info = -7
      END IF
*
      IF( info.EQ.0 ) THEN
         lwkopt = (nb+1)*n
         work( 1 ) = sroundup_lwork(lwkopt)
      END IF
*
      IF( info.NE.0 ) THEN
         CALL xerbla( 'CHETRF_AA', -info )
         RETURN
      ELSE IF( lquery ) THEN
         RETURN
      END IF
*
*     Quick return
*
      IF ( n.EQ.0 ) THEN
          RETURN
      ENDIF
      ipiv( 1 ) = 1
      IF ( n.EQ.1 ) THEN
         a( 1, 1 ) = real( a( 1, 1 ) )
         RETURN
      END IF
*
*     Adjust block size based on the workspace size
*
      IF( lwork.LT.((1+nb)*n) ) THEN
         nb = ( lwork-n ) / n
      END IF
*
      IF( upper ) THEN
*
*        .....................................................
*        Factorize A as U**H*D*U using the upper triangle of A
*        .....................................................
*
*        copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
*
         CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
*
*        J is the main loop index, increasing from 1 to N in steps of
*        JB, where JB is the number of columns factorized by CLAHEF;
*        JB is either NB, or N-J+1 for the last block
*
         j = 0
 10      CONTINUE
         IF( j.GE.n )
     $      GO TO 20
*
*        each step of the main loop
*         J is the last column of the previous panel
*         J1 is the first column of the current panel
*         K1 identifies if the previous column of the panel has been
*          explicitly stored, e.g., K1=1 for the first panel, and
*          K1=0 for the rest
*
         j1 = j + 1
         jb = min( n-j1+1, nb )
         k1 = max(1, j)-j
*
*        Panel factorization
*
         CALL clahef_aa( uplo, 2-k1, n-j, jb,
     $                      a( max(1, j), j+1 ), lda,
     $                      ipiv( j+1 ), work, n, work( n*nb+1 ) )
*
*        Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
*
         DO j2 = j+2, min(n, j+jb+1)
            ipiv( j2 ) = ipiv( j2 ) + j
            IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
               CALL cswap( j1-k1-2, a( 1, j2 ), 1,
     $                              a( 1, ipiv(j2) ), 1 )
            END IF
         END DO
         j = j + jb
*
*        Trailing submatrix update, where
*         the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
*         WORK stores the current block of the auxiriarly matrix H
*
         IF( j.LT.n ) THEN
*
*          if the first panel and JB=1 (NB=1), then nothing to do
*
            IF( j1.GT.1 .OR. jb.GT.1 ) THEN
*
*              Merge rank-1 update with BLAS-3 update
*
               alpha = conjg( a( j, j+1 ) )
               a( j, j+1 ) = one
               CALL ccopy( n-j, a( j-1, j+1 ), lda,
     $                          work( (j+1-j1+1)+jb*n ), 1 )
               CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
*
*              K1 identifies if the previous column of the panel has been
*               explicitly stored, e.g., K1=0 and K2=1 for the first panel,
*               and K1=1 and K2=0 for the rest
*
               IF( j1.GT.1 ) THEN
*
*                 Not first panel
*
                  k2 = 1
               ELSE
*
*                 First panel
*
                  k2 = 0
*
*                 First update skips the first column
*
                  jb = jb - 1
               END IF
*
               DO j2 = j+1, n, nb
                  nj = min( nb, n-j2+1 )
*
*                 Update (J2, J2) diagonal block with CGEMV
*
                  j3 = j2
                  DO mj = nj-1, 1, -1
                     CALL cgemm( 'Conjugate transpose', 'Transpose',
     $                            1, mj, jb+1,
     $                           -one, a( j1-k2, j3 ), lda,
     $                                 work( (j3-j1+1)+k1*n ), n,
     $                            one, a( j3, j3 ), lda )
                     j3 = j3 + 1
                  END DO
*
*                 Update off-diagonal block of J2-th block row with CGEMM
*
                  CALL cgemm( 'Conjugate transpose', 'Transpose',
     $                        nj, n-j3+1, jb+1,
     $                       -one, a( j1-k2, j2 ), lda,
     $                             work( (j3-j1+1)+k1*n ), n,
     $                        one, a( j2, j3 ), lda )
               END DO
*
*              Recover T( J, J+1 )
*
               a( j, j+1 ) = conjg( alpha )
            END IF
*
*           WORK(J+1, 1) stores H(J+1, 1)
*
            CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
         END IF
         GO TO 10
      ELSE
*
*        .....................................................
*        Factorize A as L*D*L**H using the lower triangle of A
*        .....................................................
*
*        copy first column A(1:N, 1) into H(1:N, 1)
*         (stored in WORK(1:N))
*
         CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
*
*        J is the main loop index, increasing from 1 to N in steps of
*        JB, where JB is the number of columns factorized by CLAHEF;
*        JB is either NB, or N-J+1 for the last block
*
         j = 0
 11      CONTINUE
         IF( j.GE.n )
     $      GO TO 20
*
*        each step of the main loop
*         J is the last column of the previous panel
*         J1 is the first column of the current panel
*         K1 identifies if the previous column of the panel has been
*          explicitly stored, e.g., K1=1 for the first panel, and
*          K1=0 for the rest
*
         j1 = j+1
         jb = min( n-j1+1, nb )
         k1 = max(1, j)-j
*
*        Panel factorization
*
         CALL clahef_aa( uplo, 2-k1, n-j, jb,
     $                      a( j+1, max(1, j) ), lda,
     $                      ipiv( j+1 ), work, n, work( n*nb+1 ) )
*
*        Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
*
         DO j2 = j+2, min(n, j+jb+1)
            ipiv( j2 ) = ipiv( j2 ) + j
            IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
               CALL cswap( j1-k1-2, a( j2, 1 ), lda,
     $                              a( ipiv(j2), 1 ), lda )
            END IF
         END DO
         j = j + jb
*
*        Trailing submatrix update, where
*          A(J2+1, J1-1) stores L(J2+1, J1) and
*          WORK(J2+1, 1) stores H(J2+1, 1)
*
         IF( j.LT.n ) THEN
*
*          if the first panel and JB=1 (NB=1), then nothing to do
*
            IF( j1.GT.1 .OR. jb.GT.1 ) THEN
*
*              Merge rank-1 update with BLAS-3 update
*
               alpha = conjg( a( j+1, j ) )
               a( j+1, j ) = one
               CALL ccopy( n-j, a( j+1, j-1 ), 1,
     $                          work( (j+1-j1+1)+jb*n ), 1 )
               CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
*
*              K1 identifies if the previous column of the panel has been
*               explicitly stored, e.g., K1=0 and K2=1 for the first panel,
*               and K1=1 and K2=0 for the rest
*
               IF( j1.GT.1 ) THEN
*
*                 Not first panel
*
                  k2 = 1
               ELSE
*
*                 First panel
*
                  k2 = 0
*
*                 First update skips the first column
*
                  jb = jb - 1
               END IF
*
               DO j2 = j+1, n, nb
                  nj = min( nb, n-j2+1 )
*
*                 Update (J2, J2) diagonal block with CGEMV
*
                  j3 = j2
                  DO mj = nj-1, 1, -1
                     CALL cgemm( 'No transpose', 'Conjugate transpose',
     $                           mj, 1, jb+1,
     $                          -one, work( (j3-j1+1)+k1*n ), n,
     $                                a( j3, j1-k2 ), lda,
     $                           one, a( j3, j3 ), lda )
                     j3 = j3 + 1
                  END DO
*
*                 Update off-diagonal block of J2-th block column with CGEMM
*
                  CALL cgemm( 'No transpose', 'Conjugate transpose',
     $                        n-j3+1, nj, jb+1,
     $                       -one, work( (j3-j1+1)+k1*n ), n,
     $                             a( j2, j1-k2 ), lda,
     $                        one, a( j3, j2 ), lda )
               END DO
*
*              Recover T( J+1, J )
*
               a( j+1, j ) = conjg( alpha )
            END IF
*
*           WORK(J+1, 1) stores H(J+1, 1)
*
            CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
         END IF
         GO TO 11
      END IF
*
   20 CONTINUE
      work( 1 ) = sroundup_lwork(lwkopt)
      RETURN
*
*     End of CHETRF_AA
*

Here is the call graph for this function:

Here is the caller graph for this function: