ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pchetdrv.f
Go to the documentation of this file.
00001       SUBROUTINE PCHETDRV( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
00002      $                     INFO )
00003 *
00004 *  -- ScaLAPACK routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 1, 1997
00008 *
00009 *     .. Scalar Arguments ..
00010       CHARACTER          UPLO
00011       INTEGER            IA, INFO, JA, N
00012 *     ..
00013 *     .. Array Arguments ..
00014       INTEGER            DESCA( * )
00015       REAL               D( * ), E( * )
00016       COMPLEX            A( * ), TAU( * ), WORK( * )
00017 *     ..
00018 *
00019 *  Purpose
00020 *  =======
00021 *
00022 *  PCHETDRV computes sub( A ) = A(IA:IA+N-1,JA:JA+N-1) from Q, the
00023 *  Hermitian tridiagonal matrix T (or D and E), and TAU, which were
00024 *  computed by PCHETRD:  sub( A ) := Q * T * Q'.
00025 *
00026 *  Notes
00027 *  =====
00028 *
00029 *  Each global data object is described by an associated description
00030 *  vector.  This vector stores the information required to establish
00031 *  the mapping between an object element and its corresponding process
00032 *  and memory location.
00033 *
00034 *  Let A be a generic term for any 2D block cyclicly distributed array.
00035 *  Such a global array has an associated description vector DESCA.
00036 *  In the following comments, the character _ should be read as
00037 *  "of the global array".
00038 *
00039 *  NOTATION        STORED IN      EXPLANATION
00040 *  --------------- -------------- --------------------------------------
00041 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00042 *                                 DTYPE_A = 1.
00043 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00044 *                                 the BLACS process grid A is distribu-
00045 *                                 ted over. The context itself is glo-
00046 *                                 bal, but the handle (the integer
00047 *                                 value) may vary.
00048 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00049 *                                 array A.
00050 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00051 *                                 array A.
00052 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00053 *                                 the rows of the array.
00054 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00055 *                                 the columns of the array.
00056 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00057 *                                 row of the array A is distributed.
00058 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00059 *                                 first column of the array A is
00060 *                                 distributed.
00061 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00062 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00063 *
00064 *  Let K be the number of rows or columns of a distributed matrix,
00065 *  and assume that its process grid has dimension p x q.
00066 *  LOCr( K ) denotes the number of elements of K that a process
00067 *  would receive if K were distributed over the p processes of its
00068 *  process column.
00069 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00070 *  process would receive if K were distributed over the q processes of
00071 *  its process row.
00072 *  The values of LOCr() and LOCc() may be determined via a call to the
00073 *  ScaLAPACK tool function, NUMROC:
00074 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00075 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00076 *  An upper bound for these quantities may be computed by:
00077 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00078 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00079 *
00080 *  Arguments
00081 *  =========
00082 *
00083 *  UPLO    (global input) CHARACTER
00084 *          Specifies whether the upper or lower triangular part of the
00085 *          Hermitian matrix sub( A ) is stored:
00086 *          = 'U':  Upper triangular
00087 *          = 'L':  Lower triangular
00088 *
00089 *  N       (global input) INTEGER
00090 *          The number of rows and columns to be operated on, i.e. the
00091 *          order of the distributed submatrix sub( A ). N >= 0.
00092 *
00093 *  A       (local input/local output) COMPLEX pointer into the
00094 *          local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
00095 *          This array contains the local pieces of sub( A ). On entry,
00096 *          if UPLO='U', the diagonal and first superdiagonal of sub( A )
00097 *          have the corresponding elements of the tridiagonal matrix T,
00098 *          and the elements above the first superdiagonal, with the
00099 *          array TAU, represent the unitary matrix Q as a product of
00100 *          elementary reflectors, and the strictly lower triangular part
00101 *          of sub( A ) is not referenced. If UPLO='L', the diagonal and
00102 *          first subdiagonal of sub( A ) have the corresponding elements
00103 *          of the tridiagonal matrix T, and the elements below the first
00104 *          subdiagonal, with the array TAU, represent the unitary
00105 *          matrix Q as a product of elementary reflectors, and the
00106 *          strictly upper triangular part of sub( A ) is not referenced.
00107 *          On exit, if UPLO = 'U', the upper triangular part of the
00108 *          distributed Hermitian matrix sub( A ) is recovered.
00109 *          If UPLO='L', the lower triangular part of the distributed
00110 *          Hermitian matrix sub( A ) is recovered.
00111 *
00112 *  IA      (global input) INTEGER
00113 *          The row index in the global array A indicating the first
00114 *          row of sub( A ).
00115 *
00116 *  JA      (global input) INTEGER
00117 *          The column index in the global array A indicating the
00118 *          first column of sub( A ).
00119 *
00120 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00121 *          The array descriptor for the distributed matrix A.
00122 *
00123 *  D       (local input) REAL array, dimension LOCc(JA+N-1)
00124 *          The diagonal elements of the tridiagonal matrix T:
00125 *          D(i) = A(i,i). D is tied to the distributed matrix A.
00126 *
00127 *  E       (local input) REAL array, dimension LOCc(JA+N-1)
00128 *          if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal
00129 *          elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
00130 *          UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
00131 *          distributed matrix A.
00132 *
00133 *  TAU     (local input) COMPLEX, array, dimension
00134 *          LOCc(JA+N-1). This array contains the scalar factors TAU of
00135 *          the elementary reflectors. TAU is tied to the distributed
00136 *          matrix A.
00137 *
00138 *  WORK    (local workspace) COMPLEX array, dimension (LWORK)
00139 *          LWORK >= 2 * NB *( NB + NP )
00140 *
00141 *          where NB = MB_A = NB_A,
00142 *          NP = NUMROC( N, NB, MYROW, IAROW, NPROW ),
00143 *          IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ).
00144 *
00145 *          INDXG2P and NUMROC are ScaLAPACK tool functions;
00146 *          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
00147 *          the subroutine BLACS_GRIDINFO.
00148 *
00149 *  INFO    (global output) INTEGER
00150 *          On exit, if INFO <> 0, a discrepancy has been found between
00151 *          the diagonal and off-diagonal elements of A and the copies
00152 *          contained in the arrays D and E.
00153 *
00154 *  =====================================================================
00155 *
00156 *     .. Parameters ..
00157       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00158      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00159       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00160      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00161      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00162       REAL               REIGHT, RONE, RZERO
00163       PARAMETER            ( REIGHT = 8.0E+0, RONE = 1.0E+0,
00164      $                     RZERO = 0.0E+0 )
00165       COMPLEX              HALF, ONE, ZERO
00166       PARAMETER            ( HALF = ( 0.5E+0, 0.0E+0 ),
00167      $                     ONE = ( 1.0E+0, 0.0E+0 ),
00168      $                     ZERO = ( 0.0E+0, 0.0E+0 ) )
00169 *     ..
00170 *     .. Local Scalars ..
00171       LOGICAL            UPPER
00172       INTEGER            I, IACOL, IAROW, ICTXT, II, IPT, IPV, IPX,
00173      $                   IPY, J, JB, JJ, JL, K, MYCOL, MYROW, NB, NP,
00174      $                   NPCOL, NPROW
00175       REAL               ADDBND, D2, E2
00176       COMPLEX            D1, E1
00177 *     ..
00178 *     .. Local Arrays ..
00179       INTEGER            DESCD( DLEN_ ), DESCE( DLEN_ ), DESCV( DLEN_ ),
00180      $                   DESCT( DLEN_ )
00181 *     ..
00182 *     .. External Functions ..
00183       LOGICAL            LSAME
00184       INTEGER            INDXG2P, NUMROC
00185       REAL               PSLAMCH
00186       EXTERNAL           INDXG2P, LSAME, NUMROC, PSLAMCH
00187 *     ..
00188 *     .. External Subroutines ..
00189       EXTERNAL           BLACS_GRIDINFO, DESCSET, INFOG2L, IGSUM2D,
00190      $                   PCELGET, PCGEMM, PCHEMM,
00191      $                   PCHER2K, PCLACPY, PCLARFT,
00192      $                   PCLASET, PCTRMM, PSELGET
00193 *     ..
00194 *     .. Intrinsic Functions ..
00195       INTRINSIC          ABS, CMPLX, MAX, MIN, MOD
00196 *     ..
00197 *     .. Executable statements ..
00198 *
00199       ICTXT = DESCA( CTXT_ )
00200       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00201 *
00202       INFO = 0
00203       NB = DESCA( MB_ )
00204       UPPER = LSAME( UPLO, 'U' )
00205       CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II, JJ,
00206      $              IAROW, IACOL )
00207       NP = NUMROC( N, NB, MYROW, IAROW, NPROW )
00208 *
00209       IPT = 1
00210       IPV = NB * NB + IPT
00211       IPX = NB * NP + IPV
00212       IPY = NB * NP + IPX
00213 *
00214       CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW,
00215      $              DESCA( CSRC_ ), DESCA( CTXT_ ), 1 )
00216 *
00217       ADDBND = REIGHT * PSLAMCH( ICTXT, 'eps' )
00218 *
00219       IF( UPPER ) THEN
00220 *
00221          CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW,
00222      $                 DESCA( CSRC_ ), DESCA( CTXT_ ), 1 )
00223 *
00224          DO 10 J = 0, N-1
00225             D1 = ZERO
00226             E1 = ZERO
00227             D2 = RZERO
00228             E2 = RZERO
00229             CALL PSELGET( ' ', ' ', D2, D, 1, JA+J, DESCD )
00230             CALL PCELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA )
00231             IF( J.LT.(N-1) ) THEN
00232                CALL PSELGET( ' ', ' ', E2, E, 1, JA+J+1, DESCE )
00233                CALL PCELGET( 'Columnwise', ' ', E1, A, IA+J, JA+J+1,
00234      $                       DESCA )
00235             END IF
00236 *
00237             IF( ( ABS( D1 - CMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR.
00238      $          ( ABS( E1 - CMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) )
00239      $         INFO = INFO + 1
00240    10    CONTINUE
00241 *
00242 *        Compute the upper triangle of sub( A ).
00243 *
00244          CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT,
00245      $                 MAX( 1, NP ) )
00246          CALL DESCSET( DESCT, NB, NB, NB, NB, IAROW, IACOL, ICTXT, NB )
00247 *
00248          DO 20 K = 0, N-1, NB
00249             JB = MIN( NB, N-K )
00250             I = IA + K
00251             J = JA + K
00252 *
00253 *           Compute the lower triangular matrix T.
00254 *
00255             CALL PCLARFT( 'Backward', 'Columnwise', K+JB-1, JB, A, IA,
00256      $                    J, DESCA, TAU, WORK( IPT ), WORK( IPV ) )
00257 *
00258 *           Copy Householder vectors into WORK( IPV ).
00259 *
00260             CALL PCLACPY( 'All', K+JB-1, JB, A, IA, J, DESCA,
00261      $                    WORK( IPV ), 1, 1, DESCV )
00262 *
00263             IF( K.GT.0 ) THEN
00264                CALL PCLASET( 'Lower', JB+1, JB, ZERO, ONE, WORK( IPV ),
00265      $                       K, 1, DESCV )
00266             ELSE
00267                CALL PCLASET( 'Lower', JB, JB-1, ZERO, ONE, WORK( IPV ),
00268      $                       1, 2, DESCV )
00269                CALL PCLASET( 'Ge', JB, 1, ZERO, ZERO, WORK( IPV ), 1,
00270      $                       1, DESCV )
00271             END IF
00272 *
00273 *           Zero out the strict upper triangular part of A.
00274 *
00275             IF( K.GT.0 ) THEN
00276                CALL PCLASET( 'Ge', K-1, JB, ZERO, ZERO, A, IA, J,
00277      $                       DESCA )
00278                CALL PCLASET( 'Upper', JB-1, JB-1, ZERO, ZERO, A, I-1,
00279      $                       J+1, DESCA )
00280             ELSE IF( JB.GT.1 ) THEN
00281                CALL PCLASET( 'Upper', JB-2, JB-2, ZERO, ZERO, A, IA,
00282      $                       J+2, DESCA )
00283             END IF
00284 *
00285 *           (1) X := A * V * T'
00286 *
00287             CALL PCHEMM( 'Left', 'Upper', K+JB, JB, ONE, A, IA, JA,
00288      $                   DESCA, WORK( IPV ), 1, 1, DESCV, ZERO,
00289      $                   WORK( IPX ), 1, 1, DESCV )
00290             CALL PCTRMM( 'Right', 'Lower', 'Conjugate transpose',
00291      $                   'Non-Unit', K+JB, JB, ONE, WORK( IPT ), 1, 1,
00292      $                   DESCT, WORK( IPX ), 1, 1, DESCV )
00293 *
00294 *           (2) X := X - 1/2 * V * (T * V' * X)
00295 *
00296             CALL PCGEMM( 'Conjugate transpose', 'No transpose', JB, JB,
00297      $                   K+JB, ONE, WORK( IPV ), 1, 1, DESCV,
00298      $                   WORK( IPX ), 1, 1, DESCV, ZERO, WORK( IPY ),
00299      $                   1, 1, DESCT )
00300             CALL PCTRMM( 'Left', 'Lower', 'No transpose', 'Non-Unit',
00301      $                   JB, JB, ONE, WORK( IPT ), 1, 1, DESCT,
00302      $                   WORK( IPY ), 1, 1, DESCT )
00303             CALL PCGEMM( 'No tranpose', 'No transpose', K+JB, JB, JB,
00304      $                   -HALF, WORK( IPV ), 1, 1, DESCV, WORK( IPY ),
00305      $                   1, 1, DESCT, ONE, WORK( IPX ), 1, 1, DESCV )
00306 *
00307 *           (3) A := A - X * V' - V * X'
00308 *
00309             CALL PCHER2K( 'Upper', 'No transpose', K+JB, JB, -ONE,
00310      $                    WORK( IPV ), 1, 1, DESCV, WORK( IPX ), 1, 1,
00311      $                    DESCV, RONE, A, IA, JA, DESCA )
00312 *
00313             DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + 1, NPCOL )
00314             DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + 1, NPCOL )
00315 *
00316    20    CONTINUE
00317 *
00318       ELSE
00319 *
00320          CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW,
00321      $                 DESCA( CSRC_ ), DESCA( CTXT_ ), 1 )
00322 *
00323          DO 30 J = 0, N-1
00324             D1 = ZERO
00325             E1 = ZERO
00326             D2 = RZERO
00327             E2 = RZERO
00328             CALL PSELGET( ' ', ' ', D2, D, 1, JA+J, DESCD )
00329             CALL PCELGET( 'Columnwise', ' ', D1, A, IA+J, JA+J, DESCA )
00330             IF( J.LT.(N-1) ) THEN
00331                CALL PSELGET( ' ', ' ', E2, E, 1, JA+J, DESCE )
00332                CALL PCELGET( 'Columnwise', ' ', E1, A, IA+J+1, JA+J,
00333      $                       DESCA )
00334             END IF
00335 *
00336             IF( ( ABS( D1 - CMPLX( D2 ) ).GT.( ABS( D2 )*ADDBND ) ) .OR.
00337      $          ( ABS( E1 - CMPLX( E2 ) ).GT.( ABS( E2 )*ADDBND ) ) )
00338      $         INFO = INFO + 1
00339    30    CONTINUE
00340 *
00341 *        Compute the lower triangle of sub( A ).
00342 *
00343          JL = MAX( ( ( JA+N-2 ) / NB ) * NB + 1, JA )
00344          IACOL = INDXG2P( JL, NB, MYCOL, DESCA( CSRC_ ), NPCOL )
00345          CALL DESCSET( DESCV, N, NB, NB, NB, IAROW, IACOL, ICTXT,
00346      $                 MAX( 1, NP ) )
00347          CALL DESCSET( DESCT, NB, NB, NB, NB, INDXG2P( IA+JL-JA+1, NB,
00348      $                 MYROW, DESCA( RSRC_ ), NPROW ), IACOL, ICTXT,
00349      $                 NB )
00350 *
00351          DO 40 J = JL, JA, -NB
00352             K  = J - JA + 1
00353             I  = IA + K - 1
00354             JB = MIN( N-K+1, NB )
00355 *
00356 *           Compute upper triangular matrix T from TAU.
00357 *
00358             CALL PCLARFT( 'Forward', 'Columnwise', N-K, JB, A, I+1, J,
00359      $                    DESCA, TAU, WORK( IPT ), WORK( IPV ) )
00360 *
00361 *           Copy Householder vectors into WORK( IPV ).
00362 *
00363             CALL PCLACPY( 'Lower', N-K, JB, A, I+1, J, DESCA,
00364      $                    WORK( IPV ), K+1, 1, DESCV )
00365             CALL PCLASET( 'Upper', N-K, JB, ZERO, ONE, WORK( IPV ),
00366      $                    K+1, 1, DESCV )
00367             CALL PCLASET( 'Ge', 1, JB, ZERO, ZERO, WORK( IPV ), K, 1,
00368      $                    DESCV )
00369 *
00370 *           Zero out the strict lower triangular part of A.
00371 *
00372             CALL PCLASET( 'Lower', N-K-1, JB, ZERO, ZERO, A, I+2, J,
00373      $                    DESCA )
00374 *
00375 *           (1) X := A * V * T'
00376 *
00377             CALL PCHEMM( 'Left', 'Lower', N-K+1, JB, ONE, A, I, J,
00378      $                   DESCA, WORK( IPV ), K, 1, DESCV, ZERO,
00379      $                   WORK( IPX ), K, 1, DESCV )
00380             CALL PCTRMM( 'Right', 'Upper', 'Conjugate transpose',
00381      $                   'Non-Unit', N-K+1, JB, ONE, WORK( IPT ), 1, 1,
00382      $                   DESCT, WORK( IPX ), K, 1, DESCV )
00383 *
00384 *           (2) X := X - 1/2 * V * (T * V' * X)
00385 *
00386             CALL PCGEMM( 'Conjugate transpose', 'No transpose', JB, JB,
00387      $                   N-K+1, ONE, WORK( IPV ), K, 1, DESCV,
00388      $                   WORK( IPX ), K, 1, DESCV, ZERO, WORK( IPY ),
00389      $                   1, 1, DESCT )
00390             CALL PCTRMM( 'Left', 'Upper', 'No transpose', 'Non-Unit',
00391      $                   JB, JB, ONE, WORK( IPT ), 1, 1, DESCT,
00392      $                   WORK( IPY ), 1, 1, DESCT )
00393             CALL PCGEMM( 'No transpose', 'No transpose', N-K+1, JB, JB,
00394      $                   -HALF, WORK( IPV ), K, 1, DESCV, WORK( IPY ),
00395      $                   1, 1, DESCT, ONE, WORK( IPX ), K, 1, DESCV )
00396 *
00397 *           (3) A := A - X * V' - V * X'
00398 *
00399             CALL PCHER2K( 'Lower', 'No tranpose', N-K+1, JB, -ONE,
00400      $                    WORK( IPV ), K, 1, DESCV, WORK( IPX ), K, 1,
00401      $                    DESCV, RONE, A, I, J, DESCA )
00402 *
00403             DESCV( CSRC_ ) = MOD( DESCV( CSRC_ ) + NPCOL - 1, NPCOL )
00404             DESCT( RSRC_ ) = MOD( DESCT( RSRC_ ) + NPROW - 1, NPROW )
00405             DESCT( CSRC_ ) = MOD( DESCT( CSRC_ ) + NPCOL - 1, NPCOL )
00406 *
00407    40    CONTINUE
00408 *
00409       END IF
00410 *
00411       CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 )
00412 *
00413       RETURN
00414 *
00415 *     End of PCHETDRV
00416 *
00417       END