ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pclarz.f
Go to the documentation of this file.
00001       SUBROUTINE PCLARZ( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C,
00002      $                   IC, JC, DESCC, WORK )
00003 *
00004 *  -- ScaLAPACK auxiliary routine (version 1.7) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00006 *     and University of California, Berkeley.
00007 *     May 25, 2001
00008 *
00009 *     .. Scalar Arguments ..
00010       CHARACTER          SIDE
00011       INTEGER            IC, INCV, IV, JC, JV, L, M, N
00012 *     ..
00013 *     .. Array Arguments ..
00014       INTEGER            DESCC( * ), DESCV( * )
00015       COMPLEX            C( * ), TAU( * ), V( * ), WORK( * )
00016 *     ..
00017 *
00018 *  Purpose
00019 *  =======
00020 *
00021 *  PCLARZ applies a complex elementary reflector Q to a complex M-by-N
00022 *  distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from either the
00023 *  left or the right. Q is represented in the form
00024 *
00025 *        Q = I - tau * v * v'
00026 *
00027 *  where tau is a complex scalar and v is a complex vector.
00028 *
00029 *  If tau = 0, then Q is taken to be the unit matrix.
00030 *
00031 *  Q is a product of k elementary reflectors as returned by PCTZRZF.
00032 *
00033 *  Notes
00034 *  =====
00035 *
00036 *  Each global data object is described by an associated description
00037 *  vector.  This vector stores the information required to establish
00038 *  the mapping between an object element and its corresponding process
00039 *  and memory location.
00040 *
00041 *  Let A be a generic term for any 2D block cyclicly distributed array.
00042 *  Such a global array has an associated description vector DESCA.
00043 *  In the following comments, the character _ should be read as
00044 *  "of the global array".
00045 *
00046 *  NOTATION        STORED IN      EXPLANATION
00047 *  --------------- -------------- --------------------------------------
00048 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00049 *                                 DTYPE_A = 1.
00050 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00051 *                                 the BLACS process grid A is distribu-
00052 *                                 ted over. The context itself is glo-
00053 *                                 bal, but the handle (the integer
00054 *                                 value) may vary.
00055 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00056 *                                 array A.
00057 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00058 *                                 array A.
00059 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00060 *                                 the rows of the array.
00061 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00062 *                                 the columns of the array.
00063 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00064 *                                 row of the array A is distributed.
00065 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00066 *                                 first column of the array A is
00067 *                                 distributed.
00068 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00069 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00070 *
00071 *  Let K be the number of rows or columns of a distributed matrix,
00072 *  and assume that its process grid has dimension p x q.
00073 *  LOCr( K ) denotes the number of elements of K that a process
00074 *  would receive if K were distributed over the p processes of its
00075 *  process column.
00076 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00077 *  process would receive if K were distributed over the q processes of
00078 *  its process row.
00079 *  The values of LOCr() and LOCc() may be determined via a call to the
00080 *  ScaLAPACK tool function, NUMROC:
00081 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00082 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00083 *  An upper bound for these quantities may be computed by:
00084 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00085 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00086 *
00087 *  Because vectors may be viewed as a subclass of matrices, a
00088 *  distributed vector is considered to be a distributed matrix.
00089 *
00090 *  Restrictions
00091 *  ============
00092 *
00093 *  If SIDE = 'Left' and INCV = 1, then the row process having the first
00094 *  entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover,
00095 *  MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only
00096 *  the last equality must be satisfied.
00097 *
00098 *  If SIDE = 'Right' and INCV = M_V then the column process having the
00099 *  first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and
00100 *  MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only
00101 *  the last equality must be satisfied.
00102 *
00103 *  Arguments
00104 *  =========
00105 *
00106 *  SIDE    (global input) CHARACTER
00107 *          = 'L': form  Q * sub( C ),
00108 *          = 'R': form  sub( C ) * Q.
00109 *
00110 *  M       (global input) INTEGER
00111 *          The number of rows to be operated on i.e the number of rows
00112 *          of the distributed submatrix sub( C ). M >= 0.
00113 *
00114 *  N       (global input) INTEGER
00115 *          The number of columns to be operated on i.e the number of
00116 *          columns of the distributed submatrix sub( C ). N >= 0.
00117 *
00118 *  L       (global input) INTEGER
00119 *          The columns of the distributed submatrix sub( A ) containing
00120 *          the meaningful part of the Householder reflectors.
00121 *          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
00122 *
00123 *  V       (local input) COMPLEX pointer into the local memory
00124 *          to an array of dimension (LLD_V,*) containing the local
00125 *          pieces of the distributed vectors V representing the
00126 *          Householder transformation Q,
00127 *             V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1,
00128 *             V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V,
00129 *             V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1,
00130 *             V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V,
00131 *
00132 *          The vector v in the representation of Q. V is not used if
00133 *          TAU = 0.
00134 *
00135 *  IV      (global input) INTEGER
00136 *          The row index in the global array V indicating the first
00137 *          row of sub( V ).
00138 *
00139 *  JV      (global input) INTEGER
00140 *          The column index in the global array V indicating the
00141 *          first column of sub( V ).
00142 *
00143 *  DESCV   (global and local input) INTEGER array of dimension DLEN_.
00144 *          The array descriptor for the distributed matrix V.
00145 *
00146 *  INCV    (global input) INTEGER
00147 *          The global increment for the elements of V. Only two values
00148 *          of INCV are supported in this version, namely 1 and M_V.
00149 *          INCV must not be zero.
00150 *
00151 *  TAU     (local input) COMPLEX, array, dimension  LOCc(JV) if
00152 *          INCV = 1, and LOCr(IV) otherwise. This array contains the
00153 *          Householder scalars related to the Householder vectors.
00154 *          TAU is tied to the distributed matrix V.
00155 *
00156 *  C       (local input/local output) COMPLEX pointer into the
00157 *          local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),
00158 *          containing the local pieces of sub( C ). On exit, sub( C )
00159 *          is overwritten by the Q * sub( C ) if SIDE = 'L', or
00160 *          sub( C ) * Q if SIDE = 'R'.
00161 *
00162 *  IC      (global input) INTEGER
00163 *          The row index in the global array C indicating the first
00164 *          row of sub( C ).
00165 *
00166 *  JC      (global input) INTEGER
00167 *          The column index in the global array C indicating the
00168 *          first column of sub( C ).
00169 *
00170 *  DESCC   (global and local input) INTEGER array of dimension DLEN_.
00171 *          The array descriptor for the distributed matrix C.
00172 *
00173 *  WORK    (local workspace) COMPLEX array, dimension (LWORK)
00174 *          If INCV = 1,
00175 *            if SIDE = 'L',
00176 *              if IVCOL = ICCOL,
00177 *                LWORK >= NqC0
00178 *              else
00179 *                LWORK >= MpC0 + MAX( 1, NqC0 )
00180 *              end if
00181 *            else if SIDE = 'R',
00182 *              LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(
00183 *                       N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )
00184 *            end if
00185 *          else if INCV = M_V,
00186 *            if SIDE = 'L',
00187 *              LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(
00188 *                       M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )
00189 *            else if SIDE = 'R',
00190 *              if IVROW = ICROW,
00191 *                LWORK >= MpC0
00192 *              else
00193 *                LWORK >= NqC0 + MAX( 1, MpC0 )
00194 *              end if
00195 *            end if
00196 *          end if
00197 *
00198 *          where LCM is the least common multiple of NPROW and NPCOL and
00199 *          LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,
00200 *          LCMQ = LCM / NPCOL,
00201 *
00202 *          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
00203 *          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
00204 *          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
00205 *          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
00206 *          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
00207 *
00208 *          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
00209 *          MYROW, MYCOL, NPROW and NPCOL can be determined by calling
00210 *          the subroutine BLACS_GRIDINFO.
00211 *
00212 *  Alignment requirements
00213 *  ======================
00214 *
00215 *  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
00216 *  must verify some alignment properties, namely the following
00217 *  expressions should be true:
00218 *
00219 *  MB_V = NB_V,
00220 *
00221 *  If INCV = 1,
00222 *    If SIDE = 'Left',
00223 *      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
00224 *    If SIDE = 'Right',
00225 *      ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
00226 *  else if INCV = M_V,
00227 *    If SIDE = 'Left',
00228 *      ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
00229 *    If SIDE = 'Right',
00230 *      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
00231 *  end if
00232 *
00233 *  =====================================================================
00234 *
00235 *     .. Parameters ..
00236       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00237      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00238       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00239      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00240      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00241       COMPLEX            ONE, ZERO
00242       PARAMETER          ( ONE  = ( 1.0E+0, 0.0E+0 ),
00243      $                     ZERO = ( 0.0E+0, 0.0E+0 ) )
00244 *     ..
00245 *     .. Local Scalars ..
00246       LOGICAL            CCBLCK, CRBLCK, LEFT
00247       CHARACTER          COLBTOP, ROWBTOP
00248       INTEGER            ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
00249      $                   ICROW1, ICROW2, ICTXT, IIC1, IIC2, IIV, IOFFC1,
00250      $                   IOFFC2, IOFFV, IPW, IROFFC1, IROFFC2, IROFFV,
00251      $                   IVCOL, IVROW, JJC1, JJC2, JJV, LDC, LDV, MPC2,
00252      $                   MPV, MYCOL, MYROW, NCC, NCV, NPCOL, NPROW,
00253      $                   NQC2, NQV, RDEST
00254       COMPLEX            TAULOC
00255 *     ..
00256 *     .. External Subroutines ..
00257       EXTERNAL           BLACS_GRIDINFO, CAXPY, CCOPY, CGEBR2D,
00258      $                   CGEBS2D, CGEMV, CGERC, CGERV2D,
00259      $                   CGESD2D, CGSUM2D, CLASET, INFOG2L,
00260      $                   PB_TOPGET, PBCTRNV
00261 *     ..
00262 *     .. External Functions ..
00263       LOGICAL            LSAME
00264       INTEGER            NUMROC
00265       EXTERNAL           LSAME, NUMROC
00266 *     ..
00267 *     .. Intrinsic Functions ..
00268       INTRINSIC          MIN, MOD
00269 *     ..
00270 *     .. Executable Statements ..
00271 *
00272 *     Quick return if possible
00273 *
00274       IF( M.LE.0 .OR. N.LE.0 )
00275      $   RETURN
00276 *
00277 *     Get grid parameters.
00278 *
00279       ICTXT = DESCC( CTXT_ )
00280       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00281 *
00282 *     Figure local indexes
00283 *
00284       LEFT = LSAME( SIDE, 'L' )
00285       CALL INFOG2L( IV, JV, DESCV, NPROW, NPCOL, MYROW, MYCOL, IIV, JJV,
00286      $              IVROW, IVCOL )
00287       IROFFV = MOD( IV-1, DESCV( NB_ ) )
00288       MPV = NUMROC( L+IROFFV, DESCV( MB_ ), MYROW, IVROW, NPROW )
00289       IF( MYROW.EQ.IVROW )
00290      $   MPV = MPV - IROFFV
00291       ICOFFV = MOD( JV-1, DESCV( NB_ ) )
00292       NQV = NUMROC( L+ICOFFV, DESCV( NB_ ), MYCOL, IVCOL, NPCOL )
00293       IF( MYCOL.EQ.IVCOL )
00294      $   NQV = NQV - ICOFFV
00295       LDV = DESCV( LLD_ )
00296       NCV = NUMROC( DESCV( N_ ), DESCV( NB_ ), MYCOL, DESCV( CSRC_ ),
00297      $              NPCOL )
00298       LDV = DESCV( LLD_ )
00299       IIV = MIN( IIV, LDV )
00300       JJV = MIN( JJV, NCV )
00301       IOFFV = IIV+(JJV-1)*LDV
00302       NCC = NUMROC( DESCC( N_ ), DESCC( NB_ ), MYCOL, DESCC( CSRC_ ),
00303      $              NPCOL )
00304       CALL INFOG2L( IC, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
00305      $              IIC1, JJC1, ICROW1, ICCOL1 )
00306       IROFFC1 = MOD( IC-1, DESCC( MB_ ) )
00307       ICOFFC1 = MOD( JC-1, DESCC( NB_ ) )
00308       LDC = DESCC( LLD_ )
00309       IIC1 = MIN( IIC1, LDC )
00310       JJC1 = MIN( JJC1, MAX( 1, NCC ) )
00311       IOFFC1 = IIC1 + ( JJC1-1 ) * LDC
00312 *
00313       IF( LEFT ) THEN
00314          CALL INFOG2L( IC+M-L, JC, DESCC, NPROW, NPCOL, MYROW, MYCOL,
00315      $                 IIC2, JJC2, ICROW2, ICCOL2 )
00316          IROFFC2 = MOD( IC+M-L-1, DESCC( MB_ ) )
00317          ICOFFC2 = MOD( JC-1, DESCC( NB_ ) )
00318          NQC2 = NUMROC( N+ICOFFC2, DESCC( NB_ ), MYCOL, ICCOL2, NPCOL )
00319          IF( MYCOL.EQ.ICCOL2 )
00320      $      NQC2 = NQC2 - ICOFFC2
00321       ELSE
00322          CALL INFOG2L( IC, JC+N-L, DESCC, NPROW, NPCOL, MYROW, MYCOL,
00323      $                 IIC2, JJC2, ICROW2, ICCOL2 )
00324          IROFFC2 = MOD( IC-1, DESCC( MB_ ) )
00325          MPC2 = NUMROC( M+IROFFC2, DESCC( MB_ ), MYROW, ICROW2, NPROW )
00326          IF( MYROW.EQ.ICROW2 )
00327      $      MPC2 = MPC2 - IROFFC2
00328          ICOFFC2 = MOD( JC+N-L-1, DESCC( NB_ ) )
00329       END IF
00330       IIC2 = MIN( IIC2, LDC )
00331       JJC2 = MIN( JJC2, NCC )
00332       IOFFC2 = IIC2 + ( JJC2-1 ) * LDC
00333 *
00334 *     Is sub( C ) only distributed over a process row ?
00335 *
00336       CRBLCK = ( M.LE.(DESCC( MB_ )-IROFFC1) )
00337 *
00338 *     Is sub( C ) only distributed over a process column ?
00339 *
00340       CCBLCK = ( N.LE.(DESCC( NB_ )-ICOFFC1) )
00341 *
00342       IF( LEFT ) THEN
00343 *
00344          IF( CRBLCK ) THEN
00345             RDEST = ICROW2
00346          ELSE
00347             RDEST = -1
00348          END IF
00349 *
00350          IF( CCBLCK ) THEN
00351 *
00352 *           sub( C ) is distributed over a process column
00353 *
00354             IF( DESCV( M_ ).EQ.INCV ) THEN
00355 *
00356 *              Transpose row vector V (ICOFFV = IROFFC2)
00357 *
00358                IPW = MPV+1
00359                CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M,
00360      $                       DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV,
00361      $                       ZERO,
00362      $                       WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2,
00363      $                       WORK( IPW ) )
00364 *
00365 *              Perform the local computation within a process column
00366 *
00367                IF( MYCOL.EQ.ICCOL2 ) THEN
00368 *
00369                   IF( MYROW.EQ.IVROW ) THEN
00370 *
00371                      CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
00372      $                             TAU( IIV ), 1 )
00373                      TAULOC = TAU( IIV )
00374 *
00375                   ELSE
00376 *
00377                      CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1,
00378      $                             TAULOC, 1, IVROW, MYCOL )
00379 *
00380                   END IF
00381 *
00382                   IF( TAULOC.NE.ZERO ) THEN
00383 *
00384 *                    w := sub( C )' * v
00385 *
00386                      IF( MPV.GT.0 ) THEN
00387                         CALL CGEMV( 'Conjugate transpose', MPV, NQC2,
00388      $                              ONE, C( IOFFC2 ), LDC, WORK, 1,
00389      $                              ZERO, WORK( IPW ), 1 )
00390                      ELSE
00391                         CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
00392      $                               WORK( IPW ), MAX( 1, NQC2 ) )
00393                      END IF
00394                      IF( MYROW.EQ.ICROW1 )
00395      $                  CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
00396      $                              WORK( IPW ), MAX( 1, NQC2 ) )
00397 *
00398                      CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
00399      $                             WORK( IPW ), MAX( 1, NQC2 ), RDEST,
00400      $                             MYCOL )
00401 *
00402 *                    sub( C ) := sub( C ) - v * w'
00403 *
00404                      IF( MYROW.EQ.ICROW1 )
00405      $                  CALL CAXPY( NQC2, -TAULOC, WORK( IPW ),
00406      $                              MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
00407                      CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1,
00408      $                           WORK( IPW ), 1, C( IOFFC2 ), LDC )
00409                   END IF
00410 *
00411                END IF
00412 *
00413             ELSE
00414 *
00415 *              V is a column vector
00416 *
00417                IF( IVCOL.EQ.ICCOL2 ) THEN
00418 *
00419 *                 Perform the local computation within a process column
00420 *
00421                   IF( MYCOL.EQ.ICCOL2 ) THEN
00422 *
00423                      TAULOC = TAU( JJV )
00424 *
00425                      IF( TAULOC.NE.ZERO ) THEN
00426 *
00427 *                       w := sub( C )' * v
00428 *
00429                         IF( MPV.GT.0 ) THEN
00430                            CALL CGEMV( 'Conjugate transpose', MPV, NQC2,
00431      $                              ONE, C( IOFFC2 ), LDC, V( IOFFV ),
00432      $                              1, ZERO, WORK, 1 )
00433                         ELSE
00434                            CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
00435      $                                  WORK, MAX( 1, NQC2 ) )
00436                         END IF
00437                         IF( MYROW.EQ.ICROW1 )
00438      $                     CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
00439      $                                 WORK, MAX( 1, NQC2 ) )
00440 *
00441                         CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
00442      $                                WORK, MAX( 1, NQC2 ), RDEST,
00443      $                                MYCOL )
00444 *
00445 *                       sub( C ) := sub( C ) - v * w'
00446 *
00447                         IF( MYROW.EQ.ICROW1 )
00448      $                     CALL CAXPY( NQC2, -TAULOC, WORK,
00449      $                                 MAX( 1, NQC2 ), C( IOFFC1 ),
00450      $                                 LDC )
00451                         CALL CGERC( MPV, NQC2, -TAULOC, V( IOFFV ), 1,
00452      $                              WORK, 1, C( IOFFC2 ), LDC )
00453                      END IF
00454 *
00455                   END IF
00456 *
00457                ELSE
00458 *
00459 *                 Send V and TAU to the process column ICCOL2
00460 *
00461                   IF( MYCOL.EQ.IVCOL ) THEN
00462 *
00463                      IPW = MPV+1
00464                      CALL CCOPY( MPV, V( IOFFV ), 1, WORK, 1 )
00465                      WORK( IPW ) = TAU( JJV )
00466                      CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
00467      $                             ICCOL2 )
00468 *
00469                   ELSE IF( MYCOL.EQ.ICCOL2 ) THEN
00470 *
00471                      IPW = MPV+1
00472                      CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, MYROW,
00473      $                             IVCOL )
00474                      TAULOC = WORK( IPW )
00475 *
00476                      IF( TAULOC.NE.ZERO ) THEN
00477 *
00478 *                       w := sub( C )' * v
00479 *
00480                         IF( MPV.GT.0 ) THEN
00481                            CALL CGEMV( 'Conjugate transpose', MPV, NQC2,
00482      $                                 ONE, C( IOFFC2 ), LDC, WORK, 1,
00483      $                                 ZERO, WORK( IPW ), 1 )
00484                         ELSE
00485                            CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
00486      $                                  WORK( IPW ), MAX( 1, NQC2 ) )
00487                         END IF
00488                         IF( MYROW.EQ.ICROW1 )
00489      $                     CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
00490      $                                 WORK( IPW ), MAX( 1, NQC2 ) )
00491 *
00492                         CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
00493      $                                WORK( IPW ), MAX( 1, NQC2 ),
00494      $                                RDEST, MYCOL )
00495 *
00496 *                       sub( C ) := sub( C ) - v * w'
00497 *
00498                         IF( MYROW.EQ.ICROW1 )
00499      $                     CALL CAXPY( NQC2, -TAULOC, WORK( IPW ),
00500      $                                 MAX( 1, NQC2 ), C( IOFFC1 ),
00501      $                                 LDC )
00502                         CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1,
00503      $                              WORK( IPW ), 1, C( IOFFC2 ), LDC )
00504                      END IF
00505 *
00506                   END IF
00507 *
00508                END IF
00509 *
00510             END IF
00511 *
00512          ELSE
00513 *
00514 *           sub( C ) is a proper distributed matrix
00515 *
00516             IF( DESCV( M_ ).EQ.INCV ) THEN
00517 *
00518 *              Transpose and broadcast row vector V (ICOFFV=IROFFC2)
00519 *
00520                IPW = MPV+1
00521                CALL PBCTRNV( ICTXT, 'Rowwise', 'Transpose', M,
00522      $                       DESCV( NB_ ), IROFFC2, V( IOFFV ), LDV,
00523      $                       ZERO,
00524      $                       WORK, 1, IVROW, IVCOL, ICROW2, -1,
00525      $                       WORK( IPW ) )
00526 *
00527 *              Perform the local computation within a process column
00528 *
00529                IF( MYROW.EQ.IVROW ) THEN
00530 *
00531                   CALL CGEBS2D( ICTXT, 'Columnwise', ' ', 1, 1,
00532      $                          TAU( IIV ), 1 )
00533                   TAULOC = TAU( IIV )
00534 *
00535                ELSE
00536 *
00537                   CALL CGEBR2D( ICTXT, 'Columnwise', ' ', 1, 1, TAULOC,
00538      $                          1, IVROW, MYCOL )
00539 *
00540                END IF
00541 *
00542                IF( TAULOC.NE.ZERO ) THEN
00543 *
00544 *                 w := sub( C )' * v
00545 *
00546                   IF( MPV.GT.0 ) THEN
00547                      CALL CGEMV( 'Conjugate transpose', MPV, NQC2, ONE,
00548      $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
00549      $                           WORK( IPW ), 1 )
00550                   ELSE
00551                      CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
00552      $                            WORK( IPW ), MAX( 1, NQC2 ) )
00553                   END IF
00554                   IF( MYROW.EQ.ICROW1 )
00555      $               CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
00556      $                           WORK( IPW ), MAX( 1, NQC2 ) )
00557 *
00558                   CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
00559      $                          WORK( IPW ), MAX( 1, NQC2 ), RDEST,
00560      $                          MYCOL )
00561 *
00562 *                 sub( C ) := sub( C ) - v * w'
00563 *
00564                   IF( MYROW.EQ.ICROW1 )
00565      $               CALL CAXPY( NQC2, -TAULOC, WORK( IPW ),
00566      $                           MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
00567                   CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ),
00568      $                        1, C( IOFFC2 ), LDC )
00569                END IF
00570 *
00571             ELSE
00572 *
00573 *              Broadcast column vector V
00574 *
00575                CALL PB_TOPGET( ICTXT, 'Broadcast', 'Rowwise', ROWBTOP )
00576                IF( MYCOL.EQ.IVCOL ) THEN
00577 *
00578                   IPW = MPV+1
00579                   CALL CCOPY( MPV, V( IOFFV ), 1, WORK, 1 )
00580                   WORK( IPW ) = TAU( JJV )
00581                   CALL CGEBS2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1,
00582      $                          WORK, IPW )
00583                   TAULOC = TAU( JJV )
00584 *
00585                ELSE
00586 *
00587                   IPW = MPV+1
00588                   CALL CGEBR2D( ICTXT, 'Rowwise', ROWBTOP, IPW, 1, WORK,
00589      $                          IPW, MYROW, IVCOL )
00590                   TAULOC = WORK( IPW )
00591 *
00592                END IF
00593 *
00594                IF( TAULOC.NE.ZERO ) THEN
00595 *
00596 *                 w := sub( C )' * v
00597 *
00598                   IF( MPV.GT.0 ) THEN
00599                      CALL CGEMV( 'Conjugate transpose', MPV, NQC2, ONE,
00600      $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
00601      $                           WORK( IPW ), 1 )
00602                   ELSE
00603                      CALL CLASET( 'All', NQC2, 1, ZERO, ZERO,
00604      $                            WORK( IPW ), MAX( 1, NQC2 ) )
00605                   END IF
00606                   IF( MYROW.EQ.ICROW1 )
00607      $               CALL CAXPY( NQC2, ONE, C( IOFFC1 ), LDC,
00608      $                           WORK( IPW ), MAX( 1, NQC2 ) )
00609 *
00610                   CALL CGSUM2D( ICTXT, 'Columnwise', ' ', NQC2, 1,
00611      $                          WORK( IPW ), MAX( 1, NQC2 ), RDEST,
00612      $                          MYCOL )
00613 *
00614 *                 sub( C ) := sub( C ) - v * w'
00615 *
00616                   IF( MYROW.EQ.ICROW1 )
00617      $               CALL CAXPY( NQC2, -TAULOC, WORK( IPW ),
00618      $                           MAX( 1, NQC2 ), C( IOFFC1 ), LDC )
00619                   CALL CGERC( MPV, NQC2, -TAULOC, WORK, 1, WORK( IPW ),
00620      $                        1, C( IOFFC2 ), LDC )
00621                END IF
00622 *
00623             END IF
00624 *
00625          END IF
00626 *
00627       ELSE
00628 *
00629          IF( CCBLCK ) THEN
00630             RDEST = MYROW
00631          ELSE
00632             RDEST = -1
00633          END IF
00634 *
00635          IF( CRBLCK ) THEN
00636 *
00637 *           sub( C ) is distributed over a process row
00638 *
00639             IF( DESCV( M_ ).EQ.INCV ) THEN
00640 *
00641 *              V is a row vector
00642 *
00643                IF( IVROW.EQ.ICROW2 ) THEN
00644 *
00645 *                 Perform the local computation within a process row
00646 *
00647                   IF( MYROW.EQ.ICROW2 ) THEN
00648 *
00649                      TAULOC = TAU( IIV )
00650 *
00651                      IF( TAULOC.NE.ZERO ) THEN
00652 *
00653 *                       w := sub( C ) * v
00654 *
00655                         IF( NQV.GT.0 ) THEN
00656                            CALL CGEMV( 'No transpose', MPC2, NQV, ONE,
00657      $                                 C( IOFFC2 ), LDC, V( IOFFV ),
00658      $                                 LDV, ZERO, WORK, 1 )
00659                         ELSE
00660                            CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
00661      $                                  WORK, MAX( 1, MPC2 ) )
00662                         END IF
00663                         IF( MYCOL.EQ.ICCOL1 )
00664      $                     CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
00665      $                                   WORK, 1 )
00666 *
00667                         CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
00668      $                                WORK, MAX( 1, MPC2 ), RDEST,
00669      $                               ICCOL2 )
00670 *
00671                         IF( MYCOL.EQ.ICCOL1 )
00672      $                     CALL CAXPY( MPC2, -TAULOC, WORK, 1,
00673      $                                 C( IOFFC1 ), 1 )
00674 *
00675 *                       sub( C ) := sub( C ) - w * v'
00676 *
00677                         IF( MPC2.GT.0 .AND. NQV.GT.0 )
00678      $                     CALL CGERC( MPC2, NQV, -TAULOC, WORK, 1,
00679      $                                 V( IOFFV ), LDV, C( IOFFC2 ),
00680      $                                 LDC )
00681                      END IF
00682 *
00683                   END IF
00684 *
00685                ELSE
00686 *
00687 *                 Send V and TAU to the process row ICROW2
00688 *
00689                   IF( MYROW.EQ.IVROW ) THEN
00690 *
00691                      IPW = NQV+1
00692                      CALL CCOPY( NQV, V( IOFFV ), LDV, WORK, 1 )
00693                      WORK( IPW ) = TAU( IIV )
00694                      CALL CGESD2D( ICTXT, IPW, 1, WORK, IPW, ICROW2,
00695      $                             MYCOL )
00696 *
00697                   ELSE IF( MYROW.EQ.ICROW2 ) THEN
00698 *
00699                      IPW = NQV+1
00700                      CALL CGERV2D( ICTXT, IPW, 1, WORK, IPW, IVROW,
00701      $                             MYCOL )
00702                      TAULOC = WORK( IPW )
00703 *
00704                      IF( TAULOC.NE.ZERO ) THEN
00705 *
00706 *                       w := sub( C ) * v
00707 *
00708                         IF( NQV.GT.0 ) THEN
00709                            CALL CGEMV( 'No transpose', MPC2, NQV, ONE,
00710      $                                 C( IOFFC2 ), LDC, WORK, 1, ZERO,
00711      $                                 WORK( IPW ), 1 )
00712                         ELSE
00713                            CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
00714      $                                  WORK( IPW ), MAX( 1, MPC2 ) )
00715                         END IF
00716                         IF( MYCOL.EQ.ICCOL1 )
00717      $                     CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
00718      $                                   WORK( IPW ), 1 )
00719                         CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
00720      $                                WORK( IPW ), MAX( 1, MPC2 ),
00721      $                                RDEST, ICCOL2 )
00722                         IF( MYCOL.EQ.ICCOL1 )
00723      $                     CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
00724      $                                 C( IOFFC1 ), 1 )
00725 *
00726 *                       sub( C ) := sub( C ) - w * v'
00727 *
00728                         CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1,
00729      $                              WORK, 1, C( IOFFC2 ), LDC )
00730                      END IF
00731 *
00732                   END IF
00733 *
00734                END IF
00735 *
00736             ELSE
00737 *
00738 *              Transpose column vector V (IROFFV = ICOFFC2)
00739 *
00740                IPW = NQV+1
00741                CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N,
00742      $                       DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO,
00743      $                       WORK, 1, IVROW, IVCOL, ICROW2, ICCOL2,
00744      $                       WORK( IPW ) )
00745 *
00746 *              Perform the local computation within a process column
00747 *
00748                IF( MYROW.EQ.ICROW2 ) THEN
00749 *
00750                   IF( MYCOL.EQ.IVCOL ) THEN
00751 *
00752                      CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1,
00753      $                             TAU( JJV ), 1 )
00754                      TAULOC = TAU( JJV )
00755 *
00756                   ELSE
00757 *
00758                      CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC,
00759      $                             1, MYROW, IVCOL )
00760 *
00761                   END IF
00762 *
00763                   IF( TAULOC.NE.ZERO ) THEN
00764 *
00765 *                    w := sub( C ) * v
00766 *
00767                      IF( NQV.GT.0 ) THEN
00768                         CALL CGEMV( 'No transpose', MPC2, NQV, ONE,
00769      $                              C( IOFFC2 ), LDC, WORK, 1, ZERO,
00770      $                              WORK( IPW ), 1 )
00771                      ELSE
00772                         CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
00773      $                               WORK( IPW ), MAX( 1, MPC2 ) )
00774                      END IF
00775                      IF( MYCOL.EQ.ICCOL1 )
00776      $                  CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
00777      $                              WORK( IPW ), 1 )
00778                      CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
00779      $                             WORK( IPW ), MAX( 1, MPC2 ), RDEST,
00780      $                             ICCOL2 )
00781                      IF( MYCOL.EQ.ICCOL1 )
00782      $                  CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
00783      $                              C( IOFFC1 ), 1 )
00784 *
00785 *                    sub( C ) := sub( C ) - w * v'
00786 *
00787                      CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1,
00788      $                           WORK, 1, C( IOFFC2 ), LDC )
00789                   END IF
00790 *
00791                END IF
00792 *
00793             END IF
00794 *
00795          ELSE
00796 *
00797 *           sub( C ) is a proper distributed matrix
00798 *
00799             IF( DESCV( M_ ).EQ.INCV ) THEN
00800 *
00801 *              Broadcast row vector V
00802 *
00803                CALL PB_TOPGET( ICTXT, 'Broadcast', 'Columnwise',
00804      $                       COLBTOP )
00805                IF( MYROW.EQ.IVROW ) THEN
00806 *
00807                   IPW = NQV+1
00808                   CALL CCOPY( NQV, V( IOFFV ), LDV, WORK, 1 )
00809                   WORK( IPW ) = TAU( IIV )
00810                   CALL CGEBS2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
00811      $                          WORK, IPW )
00812                   TAULOC = TAU( IIV )
00813 *
00814                ELSE
00815 *
00816                   IPW = NQV+1
00817                   CALL CGEBR2D( ICTXT, 'Columnwise', COLBTOP, IPW, 1,
00818      $                          WORK, IPW, IVROW, MYCOL )
00819                   TAULOC = WORK( IPW )
00820 *
00821                END IF
00822 *
00823                IF( TAULOC.NE.ZERO ) THEN
00824 *
00825 *                 w := sub( C ) * v
00826 *
00827                   IF( NQV.GT.0 ) THEN
00828                      CALL CGEMV( 'No Transpose', MPC2, NQV, ONE,
00829      $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
00830      $                           WORK( IPW ), 1 )
00831                   ELSE
00832                      CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
00833      $                            WORK( IPW ), MAX( 1, MPC2 ) )
00834                   END IF
00835                   IF( MYCOL.EQ.ICCOL1 )
00836      $               CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
00837      $                           WORK( IPW ), 1 )
00838 *
00839                   CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
00840      $                          WORK( IPW ), MAX( 1, MPC2 ), RDEST,
00841      $                          ICCOL2 )
00842                   IF( MYCOL.EQ.ICCOL1 )
00843      $               CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
00844      $                           C( IOFFC1 ), 1 )
00845 *
00846 *                 sub( C ) := sub( C ) - w * v'
00847 *
00848                   CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK,
00849      $                        1, C( IOFFC2 ), LDC )
00850                END IF
00851 *
00852             ELSE
00853 *
00854 *              Transpose and broadcast column vector V (ICOFFC2=IROFFV)
00855 *
00856                IPW = NQV+1
00857                CALL PBCTRNV( ICTXT, 'Columnwise', 'Transpose', N,
00858      $                       DESCV( MB_ ), ICOFFC2, V( IOFFV ), 1, ZERO,
00859      $                       WORK, 1, IVROW, IVCOL, -1, ICCOL2,
00860      $                       WORK( IPW ) )
00861 *
00862 *              Perform the local computation within a process column
00863 *
00864                IF( MYCOL.EQ.IVCOL ) THEN
00865 *
00866                   CALL CGEBS2D( ICTXT, 'Rowwise', ' ', 1, 1, TAU( JJV ),
00867      $                          1 )
00868                   TAULOC = TAU( JJV )
00869 *
00870                ELSE
00871 *
00872                   CALL CGEBR2D( ICTXT, 'Rowwise', ' ', 1, 1, TAULOC, 1,
00873      $                          MYROW, IVCOL )
00874 *
00875                END IF
00876 *
00877                IF( TAULOC.NE.ZERO ) THEN
00878 *
00879 *                 w := sub( C ) * v
00880 *
00881                   IF( NQV.GT.0 ) THEN
00882                      CALL CGEMV( 'No transpose', MPC2, NQV, ONE,
00883      $                           C( IOFFC2 ), LDC, WORK, 1, ZERO,
00884      $                           WORK( IPW ), 1 )
00885                   ELSE
00886                      CALL CLASET( 'All', MPC2, 1, ZERO, ZERO,
00887      $                            WORK( IPW ), MAX( 1, MPC2 ) )
00888                   END IF
00889                   IF( MYCOL.EQ.ICCOL1 )
00890      $               CALL CAXPY( MPC2, ONE, C( IOFFC1 ), 1,
00891      $                           WORK( IPW ), 1 )
00892                   CALL CGSUM2D( ICTXT, 'Rowwise', ' ', MPC2, 1,
00893      $                          WORK( IPW ), MAX( 1, MPC2 ), RDEST,
00894      $                          ICCOL2 )
00895                   IF( MYCOL.EQ.ICCOL1 )
00896      $               CALL CAXPY( MPC2, -TAULOC, WORK( IPW ), 1,
00897      $                           C( IOFFC1 ), 1 )
00898 *
00899 *                 sub( C ) := sub( C ) - w * v'
00900 *
00901                   CALL CGERC( MPC2, NQV, -TAULOC, WORK( IPW ), 1, WORK,
00902      $                        1, C( IOFFC2 ), LDC )
00903                END IF
00904 *
00905             END IF
00906 *
00907          END IF
00908 *
00909       END IF
00910 *
00911       RETURN
00912 *
00913 *     End of PCLARZ
00914 *
00915       END