ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pslatrd.f
Go to the documentation of this file.
00001       SUBROUTINE PSLATRD( UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W,
00002      $                    IW, JW, DESCW, WORK )
00003 *
00004 *  -- ScaLAPACK auxiliary routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 1, 1997
00008 *
00009 *     .. Scalar Arguments ..
00010       CHARACTER          UPLO
00011       INTEGER            IA, IW, JA, JW, N, NB
00012 *     ..
00013 *     .. Array Arguments ..
00014       INTEGER            DESCA( * ), DESCW( * )
00015       REAL               A( * ), D( * ), E( * ), TAU( * ), W( * ),
00016      $                   WORK( * )
00017 *     ..
00018 *
00019 *  Purpose
00020 *  =======
00021 *
00022 *  PSLATRD reduces NB rows and columns of a real symmetric distributed
00023 *  matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) to symmetric tridiagonal
00024 *  form by an orthogonal similarity transformation Q' * sub( A ) * Q,
00025 *  and returns the matrices V and W which are needed to apply the
00026 *  transformation to the unreduced part of sub( A ).
00027 *
00028 *  If UPLO = 'U', PSLATRD reduces the last NB rows and columns of a
00029 *  matrix, of which the upper triangle is supplied;
00030 *  if UPLO = 'L', PSLATRD reduces the first NB rows and columns of a
00031 *  matrix, of which the lower triangle is supplied.
00032 *
00033 *  This is an auxiliary routine called by PSSYTRD.
00034 *
00035 *  Notes
00036 *  =====
00037 *
00038 *  Each global data object is described by an associated description
00039 *  vector.  This vector stores the information required to establish
00040 *  the mapping between an object element and its corresponding process
00041 *  and memory location.
00042 *
00043 *  Let A be a generic term for any 2D block cyclicly distributed array.
00044 *  Such a global array has an associated description vector DESCA.
00045 *  In the following comments, the character _ should be read as
00046 *  "of the global array".
00047 *
00048 *  NOTATION        STORED IN      EXPLANATION
00049 *  --------------- -------------- --------------------------------------
00050 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00051 *                                 DTYPE_A = 1.
00052 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00053 *                                 the BLACS process grid A is distribu-
00054 *                                 ted over. The context itself is glo-
00055 *                                 bal, but the handle (the integer
00056 *                                 value) may vary.
00057 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00058 *                                 array A.
00059 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00060 *                                 array A.
00061 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00062 *                                 the rows of the array.
00063 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00064 *                                 the columns of the array.
00065 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00066 *                                 row of the array A is distributed.
00067 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00068 *                                 first column of the array A is
00069 *                                 distributed.
00070 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00071 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00072 *
00073 *  Let K be the number of rows or columns of a distributed matrix,
00074 *  and assume that its process grid has dimension p x q.
00075 *  LOCr( K ) denotes the number of elements of K that a process
00076 *  would receive if K were distributed over the p processes of its
00077 *  process column.
00078 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00079 *  process would receive if K were distributed over the q processes of
00080 *  its process row.
00081 *  The values of LOCr() and LOCc() may be determined via a call to the
00082 *  ScaLAPACK tool function, NUMROC:
00083 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00084 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00085 *  An upper bound for these quantities may be computed by:
00086 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00087 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00088 *
00089 *  Arguments
00090 *  =========
00091 *
00092 *  UPLO    (global input) CHARACTER
00093 *          Specifies whether the upper or lower triangular part of the
00094 *          symmetric matrix sub( A ) is stored:
00095 *          = 'U': Upper triangular
00096 *          = 'L': Lower triangular
00097 *
00098 *  N       (global input) INTEGER
00099 *          The number of rows and columns to be operated on, i.e. the
00100 *          order of the distributed submatrix sub( A ). N >= 0.
00101 *
00102 *  NB      (global input) INTEGER
00103 *          The number of rows and columns to be reduced.
00104 *
00105 *  A       (local input/local output) REAL pointer into the
00106 *          local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
00107 *          On entry, this array contains the local pieces of the
00108 *          symmetric distributed matrix sub( A ).  If UPLO = 'U', the
00109 *          leading N-by-N upper triangular part of sub( A ) contains
00110 *          the upper triangular part of the matrix, and its strictly
00111 *          lower triangular part is not referenced. If UPLO = 'L', the
00112 *          leading N-by-N lower triangular part of sub( A ) contains the
00113 *          lower triangular part of the matrix, and its strictly upper
00114 *          triangular part is not referenced.
00115 *          On exit, if UPLO = 'U', the last NB columns have been reduced
00116 *          to tridiagonal form, with the diagonal elements overwriting
00117 *          the diagonal elements of sub( A ); the elements above the
00118 *          diagonal with the array TAU, represent the orthogonal matrix
00119 *          Q as a product of elementary reflectors. If UPLO = 'L', the
00120 *          first NB columns have been reduced to tridiagonal form, with
00121 *          the diagonal elements overwriting the diagonal elements of
00122 *          sub( A ); the elements below the diagonal with the array TAU,
00123 *          represent the orthogonal matrix Q as a product of elementary
00124 *          reflectors; See Further Details.
00125 *
00126 *  IA      (global input) INTEGER
00127 *          The row index in the global array A indicating the first
00128 *          row of sub( A ).
00129 *
00130 *  JA      (global input) INTEGER
00131 *          The column index in the global array A indicating the
00132 *          first column of sub( A ).
00133 *
00134 *  DESCA   (global and local input) INTEGER array of dimension DLEN_.
00135 *          The array descriptor for the distributed matrix A.
00136 *
00137 *  D       (local output) REAL array, dimension LOCc(JA+N-1)
00138 *          The diagonal elements of the tridiagonal matrix T:
00139 *          D(i) = A(i,i). D is tied to the distributed matrix A.
00140 *
00141 *  E       (local output) REAL array, dimension LOCc(JA+N-1)
00142 *          if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal
00143 *          elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
00144 *          UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
00145 *          distributed matrix A.
00146 *
00147 *  TAU     (local output) REAL, array, dimension
00148 *          LOCc(JA+N-1). This array contains the scalar factors TAU of
00149 *          the elementary reflectors. TAU is tied to the distributed
00150 *          matrix A.
00151 *
00152 *  W       (local output) REAL pointer into the local memory
00153 *          to an array of dimension (LLD_W,NB_W), This array contains
00154 *          the local pieces of the N-by-NB_W matrix W required to
00155 *          update the unreduced part of sub( A ).
00156 *
00157 *  IW      (global input) INTEGER
00158 *          The row index in the global array W indicating the first
00159 *          row of sub( W ).
00160 *
00161 *  JW      (global input) INTEGER
00162 *          The column index in the global array W indicating the
00163 *          first column of sub( W ).
00164 *
00165 *  DESCW   (global and local input) INTEGER array of dimension DLEN_.
00166 *          The array descriptor for the distributed matrix W.
00167 *
00168 *  WORK    (local workspace) REAL array, dimension (NB_A)
00169 *
00170 *  Further Details
00171 *  ===============
00172 *
00173 *  If UPLO = 'U', the matrix Q is represented as a product of elementary
00174 *  reflectors
00175 *
00176 *     Q = H(n) H(n-1) . . . H(n-nb+1).
00177 *
00178 *  Each H(i) has the form
00179 *
00180 *     H(i) = I - tau * v * v'
00181 *
00182 *  where tau is a real scalar, and v is a real vector with
00183 *  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in
00184 *  A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
00185 *
00186 *  If UPLO = 'L', the matrix Q is represented as a product of elementary
00187 *  reflectors
00188 *
00189 *     Q = H(1) H(2) . . . H(nb).
00190 *
00191 *  Each H(i) has the form
00192 *
00193 *     H(i) = I - tau * v * v'
00194 *
00195 *  where tau is a real scalar, and v is a real vector with
00196 *  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
00197 *  A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
00198 *
00199 *  The elements of the vectors v together form the N-by-NB matrix V
00200 *  which is needed, with W, to apply the transformation to the unreduced
00201 *  part of the matrix, using a symmetric rank-2k update of the form:
00202 *  sub( A ) := sub( A ) - V*W' - W*V'.
00203 *
00204 *  The contents of A on exit are illustrated by the following examples
00205 *  with n = 5 and nb = 2:
00206 *
00207 *  if UPLO = 'U':                       if UPLO = 'L':
00208 *
00209 *    (  a   a   a   v4  v5 )              (  d                  )
00210 *    (      a   a   v4  v5 )              (  1   d              )
00211 *    (          a   1   v5 )              (  v1  1   a          )
00212 *    (              d   1  )              (  v1  v2  a   a      )
00213 *    (                  d  )              (  v1  v2  a   a   a  )
00214 *
00215 *  where d denotes a diagonal element of the reduced matrix, a denotes
00216 *  an element of the original matrix that is unchanged, and vi denotes
00217 *  an element of the vector defining H(i).
00218 *
00219 *  =====================================================================
00220 *
00221 *     .. Parameters ..
00222       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00223      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00224       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00225      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00226      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00227       REAL               HALF, ONE, ZERO
00228       PARAMETER          ( HALF = 0.5E+0, ONE = 1.0E+0, ZERO = 0.0E+0 )
00229 *     ..
00230 *     .. Local Scalars ..
00231       INTEGER            I, IACOL, IAROW, ICTXT, II, J, JJ, JP, JWK, K,
00232      $                   KW, MYCOL, MYROW, NPCOL, NPROW, NQ
00233       REAL               ALPHA
00234 *     ..
00235 *     .. Local Arrays ..
00236       INTEGER            DESCD( DLEN_ ), DESCE( DLEN_ ), DESCWK( DLEN_ )
00237 *     ..
00238 *     .. External Subroutines ..
00239       EXTERNAL           BLACS_GRIDINFO, DESCSET, INFOG2L, PSAXPY,
00240      $                   PSDOT, PSELGET, PSELSET, PSGEMV,
00241      $                   PSLARFG, PSSCAL, PSSYMV, SGEBR2D,
00242      $                   SGEBS2D
00243 *     ..
00244 *     .. External Functions ..
00245       LOGICAL            LSAME
00246       INTEGER            NUMROC
00247       EXTERNAL           LSAME, NUMROC
00248 *     ..
00249 *     .. Intrinsic Functions ..
00250       INTRINSIC          MIN
00251 *     ..
00252 *     .. Executable Statements ..
00253 *
00254 *     Quick return if possible
00255 *
00256       IF( N.LE.0 )
00257      $   RETURN
00258 *
00259       ICTXT = DESCA( CTXT_ )
00260       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00261       NQ = MAX( 1, NUMROC( JA+N-1, DESCA( NB_ ), MYCOL, DESCA( CSRC_ ),
00262      $          NPCOL ) )
00263       CALL DESCSET( DESCD, 1, JA+N-1, 1, DESCA( NB_ ), MYROW,
00264      $              DESCA( CSRC_ ), DESCA( CTXT_ ), 1 )
00265 *
00266       IF( LSAME( UPLO, 'U' ) ) THEN
00267 *
00268          CALL INFOG2L( N+IA-NB, N+JA-NB, DESCA, NPROW, NPCOL, MYROW,
00269      $                 MYCOL, II, JJ, IAROW, IACOL )
00270          CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW,
00271      $                 IACOL, ICTXT, 1 )
00272          CALL DESCSET( DESCE, 1, JA+N-1, 1, DESCA( NB_ ), MYROW,
00273      $                 DESCA( CSRC_ ), DESCA( CTXT_ ), 1 )
00274 *
00275 *        Reduce last NB columns of upper triangle
00276 *
00277          DO 10 J = JA+N-1, JA+N-NB, -1
00278             I = IA + J - JA
00279             K = J - JA + 1
00280             KW = MOD( K-1, DESCA( MB_ ) ) + 1
00281 *
00282 *           Update A(IA:I,I)
00283 *
00284             CALL PSGEMV( 'No transpose', K, N-K, -ONE, A, IA, J+1,
00285      $                   DESCA, W, IW+K-1, JW+KW, DESCW, DESCW( M_ ),
00286      $                   ONE, A, IA, J, DESCA, 1 )
00287             CALL PSGEMV( 'No transpose', K, N-K, -ONE, W, IW, JW+KW,
00288      $                   DESCW, A, I, J+1, DESCA, DESCA( M_ ), ONE, A,
00289      $                   IA, J, DESCA, 1 )
00290             IF( N-K.GT.0 )
00291      $         CALL PSELSET( A, I, J+1, DESCA, E( JP ) )
00292 *
00293 *           Generate elementary reflector H(i) to annihilate
00294 *           A(IA:I-2,I)
00295 *
00296             JP = MIN( JJ+KW-1, NQ )
00297             CALL PSLARFG( K-1, E( JP ), I-1, J, A, IA, J, DESCA, 1,
00298      $                    TAU )
00299             CALL PSELSET( A, I-1, J, DESCA, ONE )
00300 *
00301 *           Compute W(IW:IW+K-2,JW+KW-1)
00302 *
00303             CALL PSSYMV( 'Upper', K-1, ONE, A, IA, JA, DESCA, A, IA, J,
00304      $                   DESCA, 1, ZERO, W, IW, JW+KW-1, DESCW, 1 )
00305 *
00306             JWK = MOD( K-1, DESCWK( NB_ ) ) + 2
00307             CALL PSGEMV( 'Transpose', K-1, N-K, ONE, W, IW, JW+KW,
00308      $                   DESCW, A, IA, J, DESCA, 1, ZERO, WORK, 1, JWK,
00309      $                   DESCWK, DESCWK( M_ ) )
00310             CALL PSGEMV( 'No transpose', K-1, N-K, -ONE, A, IA, J+1,
00311      $                   DESCA, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE,
00312      $                   W, IW, JW+KW-1, DESCW, 1 )
00313             CALL PSGEMV( 'Transpose', K-1, N-K, ONE, A, IA, J+1, DESCA,
00314      $                   A, IA, J, DESCA, 1, ZERO, WORK, 1, JWK, DESCWK,
00315      $                   DESCWK( M_ ) )
00316             CALL PSGEMV( 'No transpose', K-1, N-K, -ONE, W, IW, JW+KW,
00317      $                   DESCW, WORK, 1, JWK, DESCWK, DESCWK( M_ ), ONE,
00318      $                   W, IW, JW+KW-1, DESCW, 1 )
00319             CALL PSSCAL( K-1, TAU( JP ), W, IW, JW+KW-1, DESCW, 1 )
00320 *
00321             CALL PSDOT( K-1, ALPHA, W, IW, JW+KW-1, DESCW, 1, A, IA, J,
00322      $                  DESCA, 1 )
00323             IF( MYCOL.EQ.IACOL )
00324      $         ALPHA = -HALF*TAU( JP )*ALPHA
00325             CALL PSAXPY( K-1, ALPHA, A, IA, J, DESCA, 1, W, IW, JW+KW-1,
00326      $                   DESCW, 1 )
00327             IF( MYCOL.EQ.IACOL ) THEN
00328                CALL PSELGET( 'E', ' ', D( JP ), A, I, J, DESCA )
00329             END IF
00330 *
00331    10    CONTINUE
00332 *
00333       ELSE
00334 *
00335          CALL INFOG2L( IA, JA, DESCA, NPROW, NPCOL, MYROW, MYCOL, II,
00336      $                 JJ, IAROW, IACOL )
00337          CALL DESCSET( DESCWK, 1, DESCW( NB_ ), 1, DESCW( NB_ ), IAROW,
00338      $                 IACOL, ICTXT, 1 )
00339          CALL DESCSET( DESCE, 1, JA+N-2, 1, DESCA( NB_ ), MYROW,
00340      $                 DESCA( CSRC_ ), DESCA( CTXT_ ), 1 )
00341 *
00342 *        Reduce first NB columns of lower triangle
00343 *
00344          DO 20 J = JA, JA+NB-1
00345             I = IA + J - JA
00346             K = J - JA + 1
00347 *
00348 *           Update A(J:JA+N-1,J)
00349 *
00350             CALL PSGEMV( 'No transpose', N-K+1, K-1, -ONE, A, I, JA,
00351      $                   DESCA, W, IW+K-1, JW, DESCW, DESCW( M_ ), ONE,
00352      $                   A, I, J, DESCA, 1 )
00353             CALL PSGEMV( 'No transpose', N-K+1, K-1, -ONE, W, IW+K-1,
00354      $                   JW, DESCW, A, I, JA, DESCA, DESCA( M_ ), ONE,
00355      $                   A, I, J, DESCA, 1 )
00356             IF( K.GT.1 )
00357      $         CALL PSELSET( A, I, J-1, DESCA, E( JP ) )
00358 *
00359 *
00360 *           Generate elementary reflector H(i) to annihilate
00361 *           A(I+2:IA+N-1,I)
00362 *
00363             JP = MIN( JJ+K-1, NQ )
00364             CALL PSLARFG( N-K, E( JP ), I+1, J, A, I+2, J, DESCA, 1,
00365      $                    TAU )
00366             CALL PSELSET( A, I+1, J, DESCA, ONE )
00367 *
00368 *           Compute W(IW+K:IW+N-1,JW+K-1)
00369 *
00370             CALL PSSYMV( 'Lower', N-K, ONE, A, I+1, J+1, DESCA, A, I+1,
00371      $                   J, DESCA, 1, ZERO, W, IW+K, JW+K-1, DESCW, 1 )
00372 *
00373             CALL PSGEMV( 'Transpose', N-K, K-1, ONE, W, IW+K, JW, DESCW,
00374      $                   A, I+1, J, DESCA, 1, ZERO, WORK, 1, 1, DESCWK,
00375      $                   DESCWK( M_ ) )
00376             CALL PSGEMV( 'No transpose', N-K, K-1, -ONE, A, I+1, JA,
00377      $                  DESCA, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W,
00378      $                  IW+K, JW+K-1, DESCW, 1 )
00379             CALL PSGEMV( 'Transpose', N-K, K-1, ONE, A, I+1, JA, DESCA,
00380      $                  A, I+1, J, DESCA, 1, ZERO, WORK, 1, 1, DESCWK,
00381      $                  DESCWK( M_ ) )
00382             CALL PSGEMV( 'No transpose', N-K, K-1, -ONE, W, IW+K, JW,
00383      $                  DESCW, WORK, 1, 1, DESCWK, DESCWK( M_ ), ONE, W,
00384      $                  IW+K, JW+K-1, DESCW, 1 )
00385             CALL PSSCAL( N-K, TAU( JP ), W, IW+K, JW+K-1, DESCW, 1 )
00386             CALL PSDOT( N-K, ALPHA, W, IW+K, JW+K-1, DESCW, 1, A, I+1,
00387      $                  J, DESCA, 1 )
00388             IF( MYCOL.EQ.IACOL )
00389      $         ALPHA = -HALF*TAU( JP )*ALPHA
00390             CALL PSAXPY( N-K, ALPHA, A, I+1, J, DESCA, 1, W, IW+K,
00391      $                   JW+K-1, DESCW, 1 )
00392             IF( MYCOL.EQ.IACOL ) THEN
00393                CALL PSELGET( 'E', ' ', D( JP ), A, I, J, DESCA )
00394             END IF
00395 *
00396    20    CONTINUE
00397 *
00398       END IF
00399 *
00400 *     Broadcast columnwise the diagonal elements into D.
00401 *
00402       IF( MYCOL.EQ.IACOL ) THEN
00403          IF( MYROW.EQ.IAROW ) THEN
00404             CALL SGEBS2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1 )
00405          ELSE
00406             CALL SGEBR2D( ICTXT, 'Columnwise', ' ', 1, NB, D( JJ ), 1,
00407      $                    IAROW, MYCOL )
00408          END IF
00409       END IF
00410 *
00411       RETURN
00412 *
00413 *     End of PSLATRD
00414 *
00415       END