ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pslarzt.f
Go to the documentation of this file.
00001       SUBROUTINE PSLARZT( DIRECT, STOREV, N, K, V, IV, JV, DESCV, TAU,
00002      $                    T, WORK )
00003 *
00004 *  -- ScaLAPACK auxiliary routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 1, 1997
00008 *
00009 *     .. Scalar Arguments ..
00010       CHARACTER          DIRECT, STOREV
00011       INTEGER            IV, JV, K, N
00012 *     ..
00013 *     .. Array Arguments ..
00014       INTEGER            DESCV( * )
00015       REAL               TAU( * ), T( * ), V( * ), WORK( * )
00016 *     ..
00017 *
00018 *  Purpose
00019 *  =======
00020 *
00021 *  PSLARZT forms the triangular factor T of a real block reflector
00022 *  H of order > n, which is defined as a product of k elementary
00023 *  reflectors as returned by PSTZRZF.
00024 *
00025 *  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
00026 *
00027 *  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
00028 *
00029 *  If STOREV = 'C', the vector which defines the elementary reflector
00030 *  H(i) is stored in the i-th column of the array V, and
00031 *
00032 *     H  =  I - V * T * V'
00033 *
00034 *  If STOREV = 'R', the vector which defines the elementary reflector
00035 *  H(i) is stored in the i-th row of the array V, and
00036 *
00037 *     H  =  I - V' * T * V
00038 *
00039 *  Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
00040 *
00041 *  Notes
00042 *  =====
00043 *
00044 *  Each global data object is described by an associated description
00045 *  vector.  This vector stores the information required to establish
00046 *  the mapping between an object element and its corresponding process
00047 *  and memory location.
00048 *
00049 *  Let A be a generic term for any 2D block cyclicly distributed array.
00050 *  Such a global array has an associated description vector DESCA.
00051 *  In the following comments, the character _ should be read as
00052 *  "of the global array".
00053 *
00054 *  NOTATION        STORED IN      EXPLANATION
00055 *  --------------- -------------- --------------------------------------
00056 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00057 *                                 DTYPE_A = 1.
00058 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00059 *                                 the BLACS process grid A is distribu-
00060 *                                 ted over. The context itself is glo-
00061 *                                 bal, but the handle (the integer
00062 *                                 value) may vary.
00063 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00064 *                                 array A.
00065 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00066 *                                 array A.
00067 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00068 *                                 the rows of the array.
00069 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00070 *                                 the columns of the array.
00071 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00072 *                                 row of the array A is distributed.
00073 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00074 *                                 first column of the array A is
00075 *                                 distributed.
00076 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00077 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00078 *
00079 *  Let K be the number of rows or columns of a distributed matrix,
00080 *  and assume that its process grid has dimension p x q.
00081 *  LOCr( K ) denotes the number of elements of K that a process
00082 *  would receive if K were distributed over the p processes of its
00083 *  process column.
00084 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00085 *  process would receive if K were distributed over the q processes of
00086 *  its process row.
00087 *  The values of LOCr() and LOCc() may be determined via a call to the
00088 *  ScaLAPACK tool function, NUMROC:
00089 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00090 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00091 *  An upper bound for these quantities may be computed by:
00092 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00093 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00094 *
00095 *  Arguments
00096 *  =========
00097 *
00098 *  DIRECT  (global input) CHARACTER
00099 *          Specifies the order in which the elementary reflectors are
00100 *          multiplied to form the block reflector:
00101 *          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
00102 *          = 'B': H = H(k) . . . H(2) H(1) (Backward)
00103 *
00104 *  STOREV  (global input) CHARACTER
00105 *          Specifies how the vectors which define the elementary
00106 *          reflectors are stored (see also Further Details):
00107 *          = 'C': columnwise                        (not supported yet)
00108 *          = 'R': rowwise
00109 *
00110 *  N       (global input) INTEGER
00111 *          The number of meaningful entries of the block reflector H.
00112 *          N >= 0.
00113 *
00114 *  K       (global input) INTEGER
00115 *          The order of the triangular factor T (= the number of
00116 *          elementary reflectors). 1 <= K <= MB_V (= NB_V).
00117 *
00118 *  V       (input/output) REAL pointer into the local memory
00119 *          to an array of local dimension (LOCr(IV+K-1),LOCc(JV+N-1)).
00120 *          The distributed matrix V contains the Householder vectors.
00121 *          See further details.
00122 *
00123 *  IV      (global input) INTEGER
00124 *          The row index in the global array V indicating the first
00125 *          row of sub( V ).
00126 *
00127 *  JV      (global input) INTEGER
00128 *          The column index in the global array V indicating the
00129 *          first column of sub( V ).
00130 *
00131 *  DESCV   (global and local input) INTEGER array of dimension DLEN_.
00132 *          The array descriptor for the distributed matrix V.
00133 *
00134 *  TAU     (local input) REAL, array, dimension LOCr(IV+K-1)
00135 *          if INCV = M_V, and LOCc(JV+K-1) otherwise. This array
00136 *          contains the Householder scalars related to the Householder
00137 *          vectors.  TAU is tied to the distributed matrix V.
00138 *
00139 *  T       (local output) REAL array, dimension (MB_V,MB_V)
00140 *          It contains the k-by-k triangular factor of the block
00141 *          reflector associated with V. T is lower triangular.
00142 *
00143 *  WORK    (local workspace) REAL array,
00144 *                                           dimension (K*(K-1)/2)
00145 *
00146 *  Further Details
00147 *  ===============
00148 *
00149 *  The shape of the matrix V and the storage of the vectors which define
00150 *  the H(i) is best illustrated by the following example with n = 5 and
00151 *  k = 3. The elements equal to 1 are not stored; the corresponding
00152 *  array elements are modified but restored on exit. The rest of the
00153 *  array is not used.
00154 *
00155 *  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
00156 *
00157 *                                              ______V_____
00158 *         ( v1 v2 v3 )                        /            \
00159 *         ( v1 v2 v3 )                      ( v1 v1 v1 v1 v1 . . . . 1 )
00160 *     V = ( v1 v2 v3 )                      ( v2 v2 v2 v2 v2 . . . 1   )
00161 *         ( v1 v2 v3 )                      ( v3 v3 v3 v3 v3 . . 1     )
00162 *         ( v1 v2 v3 )
00163 *            .  .  .
00164 *            .  .  .
00165 *            1  .  .
00166 *               1  .
00167 *                  1
00168 *
00169 *  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
00170 *
00171 *                                                        ______V_____
00172 *            1                                          /            \
00173 *            .  1                           ( 1 . . . . v1 v1 v1 v1 v1 )
00174 *            .  .  1                        ( . 1 . . . v2 v2 v2 v2 v2 )
00175 *            .  .  .                        ( . . 1 . . v3 v3 v3 v3 v3 )
00176 *            .  .  .
00177 *         ( v1 v2 v3 )
00178 *         ( v1 v2 v3 )
00179 *     V = ( v1 v2 v3 )
00180 *         ( v1 v2 v3 )
00181 *         ( v1 v2 v3 )
00182 *
00183 *  =====================================================================
00184 *
00185 *     .. Parameters ..
00186       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00187      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00188       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00189      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00190      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00191       REAL               ZERO
00192       PARAMETER          ( ZERO = 0.0E+0 )
00193 *     ..
00194 *     .. Local Scalars ..
00195       INTEGER            ICOFF, ICTXT, II, IIV, INFO, IVCOL, IVROW,
00196      $                   ITMP0, ITMP1, IW, JJV, LDV, MYCOL, MYROW,
00197      $                   NPCOL, NPROW, NQ
00198 *     ..
00199 *     .. External Subroutines ..
00200       EXTERNAL           BLACS_ABORT, BLACS_GRIDINFO, INFOG2L, PXERBLA,
00201      $                   SCOPY, SGEMV, SGSUM2D, SLASET,
00202      $                   STRMV
00203 *     ..
00204 *     .. External Functions ..
00205       LOGICAL            LSAME
00206       INTEGER            NUMROC
00207       EXTERNAL           LSAME, NUMROC
00208 *     ..
00209 *     .. Intrinsic Functions ..
00210       INTRINSIC          MOD
00211 *     ..
00212 *     .. Executable Statements ..
00213 *
00214 *     Get grid parameters
00215 *
00216       ICTXT = DESCV( CTXT_ )
00217       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00218 *
00219 *     Check for currently supported options
00220 *
00221       INFO = 0
00222       IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN
00223          INFO = -1
00224       ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN
00225          INFO = -2
00226       END IF
00227       IF( INFO.NE.0 ) THEN
00228          CALL PXERBLA( ICTXT, 'PSLARZT', -INFO )
00229          CALL BLACS_ABORT( ICTXT, 1 )
00230          RETURN
00231       END IF
00232 *
00233       CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL,
00234      $              IIV, JJV, IVROW, IVCOL )
00235 *
00236       IF( MYROW.EQ.IVROW ) THEN
00237          IW = 1
00238          ITMP0 = 0
00239          LDV = DESCV( LLD_ )
00240          ICOFF = MOD( JV-1, DESCV( NB_ ) )
00241          NQ = NUMROC( N+ICOFF, DESCV( NB_ ), MYCOL, IVCOL, NPCOL )
00242          IF( MYCOL.EQ.IVCOL )
00243      $      NQ = NQ - ICOFF
00244 *
00245          DO 10 II = IIV+K-2, IIV, -1
00246 *
00247 *           T(i+1:k,i) = -tau( iv+i-1 ) *
00248 *                     V(iv+i:iv+k-1,jv:jv+n-1) * V(iv+i-1,jv:jv+n-1)'
00249 *
00250             ITMP0 = ITMP0 + 1
00251             IF( NQ.GT.0 ) THEN
00252                CALL SGEMV( 'No transpose', ITMP0, NQ, -TAU( II ),
00253      $                     V( II+1+(JJV-1)*LDV ), LDV,
00254      $                     V( II+(JJV-1)*LDV ), LDV, ZERO, WORK( IW ),
00255      $                     1 )
00256             ELSE
00257                CALL SLASET( 'All', ITMP0, 1, ZERO, ZERO, WORK( IW ),
00258      $                      ITMP0 )
00259             END IF
00260             IW = IW + ITMP0
00261 *
00262    10    CONTINUE
00263 *
00264          CALL SGSUM2D( ICTXT, 'Rowwise', ' ', IW-1, 1, WORK, IW-1,
00265      $                 MYROW, IVCOL )
00266 *
00267          IF( MYCOL.EQ.IVCOL ) THEN
00268 *
00269             IW = 1
00270             ITMP0 = 0
00271             ITMP1 = K + 1 + (K-1) * DESCV( MB_ )
00272 *
00273             T( ITMP1-1 ) = TAU( IIV+K-1 )
00274 *
00275             DO 20 II = IIV+K-2, IIV, -1
00276 *
00277 *              T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
00278 *
00279                ITMP0 = ITMP0 + 1
00280                ITMP1 = ITMP1 - DESCV( MB_ ) - 1
00281                CALL SCOPY( ITMP0, WORK( IW ), 1, T( ITMP1 ), 1 )
00282                IW = IW + ITMP0
00283 *
00284                CALL STRMV( 'Lower', 'No transpose', 'Non-unit', ITMP0,
00285      $                     T( ITMP1+DESCV( MB_ ) ), DESCV( MB_ ),
00286      $                     T( ITMP1 ), 1 )
00287                T( ITMP1-1 ) = TAU( II )
00288 *
00289    20       CONTINUE
00290 *
00291          END IF
00292 *
00293       END IF
00294 *
00295       RETURN
00296 *
00297 *     End of PSLARZT
00298 *
00299       END