ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pdatrmv_.c
Go to the documentation of this file.
00001 /* ---------------------------------------------------------------------
00002 *
00003 *  -- PBLAS routine (version 2.0) --
00004 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00005 *     and University of California, Berkeley.
00006 *     April 1, 1998
00007 *
00008 *  ---------------------------------------------------------------------
00009 */
00010 /*
00011 *  Include files
00012 */
00013 #include "pblas.h"
00014 #include "PBpblas.h"
00015 #include "PBtools.h"
00016 #include "PBblacs.h"
00017 #include "PBblas.h"
00018 
00019 #ifdef __STDC__
00020 void pdatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N,
00021                double * ALPHA,
00022                double * A, int * IA, int * JA, int * DESCA,
00023                double * X, int * IX, int * JX, int * DESCX,
00024                int * INCX, double * BETA,
00025                double * Y, int * IY, int * JY, int * DESCY,
00026                int * INCY )
00027 #else
00028 void pdatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX,
00029                JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY )
00030 /*
00031 *  .. Scalar Arguments ..
00032 */
00033    F_CHAR_T       DIAG, TRANS, UPLO;
00034    int            * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
00035                   * N;
00036    double         * ALPHA, * BETA;
00037 /*
00038 *  .. Array Arguments ..
00039 */
00040    int            * DESCA, * DESCX, * DESCY;
00041    double         * A, * X, * Y;
00042 #endif
00043 {
00044 /*
00045 *  Purpose
00046 *  =======
00047 *
00048 *  PDATRMV  performs one of the matrix-vector operations
00049 *
00050 *     sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) +
00051 *                 abs( beta*sub( Y ) ),
00052 *     or
00053 *
00054 *     sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) +
00055 *                 abs( beta*sub( Y ) ),
00056 *
00057 *  where
00058 *
00059 *     sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1),
00060 *
00061 *     sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X,
00062 *                      X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X,
00063 *     and,
00064 *
00065 *     sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y,
00066 *                      Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y.
00067 *
00068 *  Alpha  and  beta  are  real  scalars,  sub( Y )  is a real subvector,
00069 *  sub( X ) is a subvector and sub( A ) is an n by n  triangular  subma-
00070 *  trix.
00071 *
00072 *  Notes
00073 *  =====
00074 *
00075 *  A description  vector  is associated with each 2D block-cyclicly dis-
00076 *  tributed matrix.  This  vector  stores  the  information  required to
00077 *  establish the  mapping  between a  matrix entry and its corresponding
00078 *  process and memory location.
00079 *
00080 *  In  the  following  comments,   the character _  should  be  read  as
00081 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
00082 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
00083 *
00084 *  NOTATION         STORED IN       EXPLANATION
00085 *  ---------------- --------------- ------------------------------------
00086 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
00087 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
00088 *                                   the NPROW x NPCOL BLACS process grid
00089 *                                   A  is  distributed over. The context
00090 *                                   itself  is  global,  but  the handle
00091 *                                   (the integer value) may vary.
00092 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
00093 *                                   ted matrix A, M_A >= 0.
00094 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
00095 *                                   buted matrix A, N_A >= 0.
00096 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
00097 *                                   block of the matrix A, IMB_A > 0.
00098 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
00099 *                                   left   block   of   the  matrix   A,
00100 *                                   INB_A > 0.
00101 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
00102 *                                   bute the last  M_A-IMB_A  rows of A,
00103 *                                   MB_A > 0.
00104 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
00105 *                                   bute the last  N_A-INB_A  columns of
00106 *                                   A, NB_A > 0.
00107 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
00108 *                                   row of the matrix  A is distributed,
00109 *                                   NPROW > RSRC_A >= 0.
00110 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
00111 *                                   first column of  A  is  distributed.
00112 *                                   NPCOL > CSRC_A >= 0.
00113 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
00114 *                                   array  storing  the  local blocks of
00115 *                                   the distributed matrix A,
00116 *                                   IF( Lc( 1, N_A ) > 0 )
00117 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
00118 *                                   ELSE
00119 *                                      LLD_A >= 1.
00120 *
00121 *  Let K be the number of  rows of a matrix A starting at the global in-
00122 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
00123 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
00124 *  receive if these K rows were distributed over NPROW processes.  If  K
00125 *  is the number of columns of a matrix  A  starting at the global index
00126 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
00127 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
00128 *  these K columns were distributed over NPCOL processes.
00129 *
00130 *  The values of Lr() and Lc() may be determined via a call to the func-
00131 *  tion PB_Cnumroc:
00132 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
00133 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
00134 *
00135 *  Arguments
00136 *  =========
00137 *
00138 *  UPLO    (global input) CHARACTER*1
00139 *          On entry,  UPLO  specifies whether the submatrix  sub( A ) is
00140 *          an upper or lower triangular submatrix as follows:
00141 *
00142 *             UPLO = 'U' or 'u'   sub( A ) is an upper triangular
00143 *                                 submatrix,
00144 *
00145 *             UPLO = 'L' or 'l'   sub( A ) is a  lower triangular
00146 *                                 submatrix.
00147 *
00148 *  TRANS   (global input) CHARACTER*1
00149 *          On entry,  TRANS  specifies the  operation to be performed as
00150 *          follows:
00151 *
00152 *             TRANS = 'N' or 'n'
00153 *                sub( Y ) := |alpha|*|sub( A )|*|sub( X )| +
00154 *                                                       |beta*sub( Y )|.
00155 *
00156 *             TRANS = 'T' or 't'
00157 *                sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| +
00158 *                                                       |beta*sub( Y )|.
00159 *
00160 *             TRANS = 'C' or 'c'
00161 *                sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| +
00162 *                                                       |beta*sub( Y )|.
00163 *
00164 *  DIAG    (global input) CHARACTER*1
00165 *          On entry,  DIAG  specifies  whether or not  sub( A )  is unit
00166 *          triangular as follows:
00167 *
00168 *             DIAG = 'U' or 'u'  sub( A )  is  assumed to be unit trian-
00169 *                                gular,
00170 *
00171 *             DIAG = 'N' or 'n'  sub( A ) is not assumed to be unit tri-
00172 *                                angular.
00173 *
00174 *  N       (global input) INTEGER
00175 *          On entry,  N specifies the order of the  submatrix  sub( A ).
00176 *          N must be at least zero.
00177 *
00178 *  ALPHA   (global input) DOUBLE PRECISION
00179 *          On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
00180 *          supplied  as  zero  then  the  local entries of the arrays  A
00181 *          and X corresponding to the entries of the submatrix  sub( A )
00182 *          and the subvector sub( X ) need not be set on input.
00183 *
00184 *  A       (local input) DOUBLE PRECISION array
00185 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
00186 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
00187 *          the local entries of the matrix A.
00188 *          Before entry with  UPLO = 'U' or 'u', this array contains the
00189 *          local entries corresponding to  the entries of the upper tri-
00190 *          angular submatrix  sub( A ), and the local entries correspon-
00191 *          ding to the entries of the  strictly lower triangular part of
00192 *          the submatrix  sub( A )  are not referenced.
00193 *          Before entry with  UPLO = 'L' or 'l', this array contains the
00194 *          local entries corresponding to  the entries of the lower tri-
00195 *          angular submatrix  sub( A ), and the local entries correspon-
00196 *          ding to the entries of the  strictly upper triangular part of
00197 *          the submatrix  sub( A )  are not referenced.
00198 *          Note  that  when DIAG = 'U' or 'u', the local entries corres-
00199 *          ponding to  the diagonal elements  of the submatrix  sub( A )
00200 *          are not referenced either, but are assumed to be unity.
00201 *
00202 *  IA      (global input) INTEGER
00203 *          On entry, IA  specifies A's global row index, which points to
00204 *          the beginning of the submatrix sub( A ).
00205 *
00206 *  JA      (global input) INTEGER
00207 *          On entry, JA  specifies A's global column index, which points
00208 *          to the beginning of the submatrix sub( A ).
00209 *
00210 *  DESCA   (global and local input) INTEGER array
00211 *          On entry, DESCA  is an integer array of dimension DLEN_. This
00212 *          is the array descriptor for the matrix A.
00213 *
00214 *  X       (local input) DOUBLE PRECISION array
00215 *          On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
00216 *          is   at  least  MAX( 1, Lr( 1, IX ) )  when  INCX = M_X   and
00217 *          MAX( 1, Lr( 1, IX+Lx-1 ) )  otherwise, and,  Kx  is  at least
00218 *          Lc( 1, JX+Lx-1 ) when  INCX = M_X  and Lc( 1, JX ) otherwise.
00219 *          Lx  is  N when TRANS = 'N' or 'n' and M otherwise. Before en-
00220 *          try, this array contains the local entries of the matrix X.
00221 *
00222 *  IX      (global input) INTEGER
00223 *          On entry, IX  specifies X's global row index, which points to
00224 *          the beginning of the submatrix sub( X ).
00225 *
00226 *  JX      (global input) INTEGER
00227 *          On entry, JX  specifies X's global column index, which points
00228 *          to the beginning of the submatrix sub( X ).
00229 *
00230 *  DESCX   (global and local input) INTEGER array
00231 *          On entry, DESCX  is an integer array of dimension DLEN_. This
00232 *          is the array descriptor for the matrix X.
00233 *
00234 *  INCX    (global input) INTEGER
00235 *          On entry,  INCX   specifies  the  global  increment  for  the
00236 *          elements of  X.  Only two values of  INCX   are  supported in
00237 *          this version, namely 1 and M_X. INCX  must not be zero.
00238 *
00239 *  BETA    (global input) DOUBLE PRECISION
00240 *          On entry,  BETA  specifies the scalar  beta.   When  BETA  is
00241 *          supplied  as  zero  then  the  local entries of  the array  Y
00242 *          corresponding to the entries of the subvector  sub( Y )  need
00243 *          not be set on input.
00244 *
00245 *  Y       (local input/local output) DOUBLE PRECISION array
00246 *          On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
00247 *          is   at  least  MAX( 1, Lr( 1, IY ) )  when  INCY = M_Y   and
00248 *          MAX( 1, Lr( 1, IY+Ly-1 ) )  otherwise, and,  Ky  is  at least
00249 *          Lc( 1, JY+Ly-1 ) when  INCY = M_Y  and Lc( 1, JY ) otherwise.
00250 *          Ly is M when TRANS = 'N' or 'n' and  N  otherwise. Before en-
00251 *          try, this array  contains the local  entries of the matrix Y.
00252 *          On exit, sub( Y ) is overwritten by the updated subvector.
00253 *
00254 *  IY      (global input) INTEGER
00255 *          On entry, IY  specifies Y's global row index, which points to
00256 *          the beginning of the submatrix sub( Y ).
00257 *
00258 *  JY      (global input) INTEGER
00259 *          On entry, JY  specifies Y's global column index, which points
00260 *          to the beginning of the submatrix sub( Y ).
00261 *
00262 *  DESCY   (global and local input) INTEGER array
00263 *          On entry, DESCY  is an integer array of dimension DLEN_. This
00264 *          is the array descriptor for the matrix Y.
00265 *
00266 *  INCY    (global input) INTEGER
00267 *          On entry,  INCY   specifies  the  global  increment  for  the
00268 *          elements of  Y.  Only two values of  INCY   are  supported in
00269 *          this version, namely 1 and M_Y. INCY  must not be zero.
00270 *
00271 *  -- Written on April 1, 1998 by
00272 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
00273 *
00274 *  ---------------------------------------------------------------------
00275 */
00276 /*
00277 *  .. Local Scalars ..
00278 */
00279    char           DiagA, TranOp, UploA, Yroc, * one, top;
00280    int            Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
00281                   Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr,
00282                   YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq,
00283                   Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb,
00284                   notran, nounit, npcol, nprow, size, upper, usiz;
00285    double         * tbeta;
00286    PBTYP_T        * type, * utyp;
00287 /*
00288 *  .. Local Arrays ..
00289 */
00290    int            Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_],
00291                   Yd [DLEN_];
00292    char           * Aptr = NULL, * XA = NULL, * YA = NULL;
00293 /* ..
00294 *  .. Executable Statements ..
00295 *
00296 */
00297    upper  = ( ( UploA  = Mupcase( F2C_CHAR( UPLO  )[0] ) ) ==  CUPPER );
00298    notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
00299    nounit = ( ( DiagA  = Mupcase( F2C_CHAR( DIAG  )[0] ) ) == CNOUNIT );
00300    PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
00301    PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
00302    PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
00303 #ifndef NO_ARGCHK
00304 /*
00305 *  Test the input parameters
00306 */
00307    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
00308    if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
00309    {
00310       if( ( !upper ) && ( UploA != CLOWER ) )
00311       {
00312          PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal UPLO = %c\n", UploA );
00313          info = -1;
00314       }
00315       else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
00316       {
00317          PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal TRANS = %c\n", TranOp );
00318          info = -2;
00319       }
00320       else if( ( !nounit ) && ( DiagA != CUNIT ) )
00321       {
00322          PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal DIAG = %c\n", DiagA );
00323          info = -3;
00324       }
00325       PB_Cchkmat( ctxt, "PDATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad,  9, &info );
00326       PB_Cchkvec( ctxt, "PDATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info );
00327       PB_Cchkvec( ctxt, "PDATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info );
00328    }
00329    if( info ) { PB_Cabort( ctxt, "PDATRMV", info ); return; }
00330 #endif
00331 /*
00332 *  Quick return if possible
00333 */
00334    if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) &&
00335                         ( BETA [REAL_PART] == ONE  ) ) ) return;
00336 /*
00337 *  Retrieve process grid information
00338 */
00339 #ifdef NO_ARGCHK
00340    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
00341 #endif
00342 /*
00343 *  Get type structure
00344 */
00345    type = utyp = PB_Cdtypeset();
00346    size = usiz = type->size;
00347 /*
00348 *  and when alpha is zero
00349 */
00350    if( ALPHA[REAL_PART] == ZERO )
00351    {
00352 /*
00353 *  Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
00354 */
00355       PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
00356                    &Yrow, &Ycol );
00357 
00358       if( *INCY == Yd[M_] )
00359       {
00360 /*
00361 *  sub( Y ) resides in (a) process row(s)
00362 */
00363          if( ( myrow == Yrow ) || ( Yrow < 0 ) )
00364          {
00365 /*
00366 *  Make sure I own some data and scale sub( Y )
00367 */
00368             Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
00369                               npcol );
00370             if( Ynq > 0 )
00371             {
00372                Yld = Yd[LLD_];
00373                dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
00374                         Yjj, Yld, usiz ), &Yld );
00375             }
00376          }
00377       }
00378       else
00379       {
00380 /*
00381 *  sub( Y ) resides in (a) process column(s)
00382 */
00383          if( ( mycol == Ycol ) || ( Ycol < 0 ) )
00384          {
00385 /*
00386 *  Make sure I own some data and scale sub( Y )
00387 */
00388             Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
00389                               nprow );
00390             if( Ynp > 0 )
00391             {
00392                dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
00393                         Yjj, Yd[LLD_], usiz ), INCY );
00394             }
00395          }
00396       }
00397       return;
00398    }
00399 /*
00400 *  Compute descriptor Ad0 for sub( A )
00401 */
00402    PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
00403                  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
00404 
00405    Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN );
00406 
00407    if( notran )
00408    {
00409 /*
00410 *  Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A )
00411 */
00412       PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
00413                   Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
00414                   &YAsum, &YApbY );
00415 /*
00416 *  Replicate sub( X ) in process rows spanned by sub( A ) -> XA
00417 */
00418       PB_CInV( type, NOCONJG, ROW,    *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
00419                ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
00420    }
00421    else
00422    {
00423 /*
00424 *  Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A )
00425 */
00426       PB_CInOutV( utyp, ROW,    *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y),
00427                   Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr,
00428                   &YAsum, &YApbY );
00429 /*
00430 *  Replicate sub( X ) in process columns spanned by sub( A ) -> XA
00431 */
00432       PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd,
00433                ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr );
00434    }
00435 
00436    one   = type->one;
00437 /*
00438 *  Local matrix-vector multiply iff I own some data
00439 */
00440    Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
00441    Acol  = Ad0[CSRC_]; Arow  = Ad0[RSRC_];
00442    Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
00443    Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
00444 
00445    if( ( Amp > 0 ) && ( Anq > 0 ) )
00446    {
00447       Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
00448 
00449       XAld = XAd[LLD_]; YAld = YAd[LLD_];
00450 /*
00451 *  Scale YA in the case sub( Y ) has been reused
00452 */
00453       if( notran && !( YApbY ) )
00454       {
00455 /*
00456 *  YA resides in (a) process column(s)
00457 */
00458          if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) )
00459          {
00460 /*
00461 *  Make sure I own some data and scale YA
00462 */
00463             if( Amp > 0 )
00464                dascal_( &Amp, ((char *) tbeta), YA, &ione );
00465          }
00466       }
00467       else if( !( notran ) && !( YApbY ) )
00468       {
00469 /*
00470 *  YA resides in (a) process row(s)
00471 */
00472          if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) )
00473          {
00474 /*
00475 *  Make sure I own some data and scale YA
00476 */
00477             if( Anq > 0 )
00478                dascal_( &Anq, ((char *) tbeta), YA, &YAld );
00479          }
00480       }
00481 /*
00482 *  Computational partitioning size is computed as the product of the logical
00483 *  value returned by pilaenv_ and 2 * lcm( nprow, npcol )
00484 */
00485       nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) *
00486            PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
00487 
00488       if( upper )
00489       {
00490          if( notran )
00491          {
00492             for( k = 0; k < *N; k += nb )
00493             {
00494                kb   = *N - k; kb = MIN( kb, nb );
00495                Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
00496                Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
00497                Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
00498                if( Akp > 0 && Anq0 > 0 )
00499                {
00500                   dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
00501                            Mptr( Aptr, 0, Akq,  Ald, size ), &Ald,
00502                            Mptr( XA,   0, Akq, XAld, size ), &XAld, one,
00503                            YA, &ione );
00504                }
00505                PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
00506                          ((char *) ALPHA), Aptr, k, k, Ad0,
00507                          Mptr( XA, 0, Akq, XAld, size ), XAld,
00508                          Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
00509             }
00510          }
00511          else
00512          {
00513             for( k = 0; k < *N; k += nb )
00514             {
00515                kb   = *N - k; kb = MIN( kb, nb );
00516                Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
00517                Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
00518                Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
00519                if( Akp > 0 && Anq0 > 0 )
00520                {
00521                   dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA),
00522                            Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione,
00523                            one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld );
00524                }
00525                PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1,
00526                          ((char *) ALPHA), Aptr, k, k, Ad0,
00527                          Mptr( XA, Akp, 0, XAld, size ), XAld,
00528                          Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
00529             }
00530          }
00531       }
00532       else
00533       {
00534          if( notran )
00535          {
00536             for( k = 0; k < *N; k += nb )
00537             {
00538                kb  = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
00539                Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
00540                Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
00541                PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
00542                          ((char *) ALPHA), Aptr, k, k, Ad0,
00543                          Mptr( XA, 0, Akq, XAld, size ), XAld,
00544                          Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv );
00545                Akp  = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
00546                Amp0 = Amp - Akp;
00547                Anq0 = PB_Cnumroc( kb,   k, Ainb1, Anb, mycol, Acol, npcol );
00548                if( Amp0 > 0 && Anq0 > 0 )
00549                {
00550                   dagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA),
00551                            Mptr( Aptr, Akp, Akq,  Ald, size ), &Ald,
00552                            Mptr( XA,     0, Akq, XAld, size ), &XAld, one,
00553                            Mptr( YA,   Akp,   0, YAld, usiz ), &ione );
00554                }
00555             }
00556          }
00557          else
00558          {
00559             for( k = 0; k < *N; k += nb )
00560             {
00561                kb  = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
00562                Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
00563                Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
00564                PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1,
00565                          ((char *) ALPHA), Aptr, k, k, Ad0,
00566                          Mptr( XA, Akp, 0, XAld, size ), XAld,
00567                          Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv );
00568                Akp  = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
00569                Amp0 = Amp - Akp;
00570                Anq0 = PB_Cnumroc( kb,   k, Ainb1, Anb, mycol, Acol, npcol );
00571                if( Amp0 > 0 && Anq0 > 0 )
00572                {
00573                   dagemv_( TRANS, &Amp0, &Anq0, one,
00574                            Mptr( Aptr, Akp, Akq,  Ald, size ), &Ald,
00575                            Mptr( XA,   Akp,   0, XAld, size ), &ione, one,
00576                            Mptr( YA,     0, Akq, YAld, usiz ), &YAld );
00577                }
00578             }
00579          }
00580       }
00581    }
00582    if( XAfr ) free( XA );
00583 
00584    if( notran )
00585    {
00586 /*
00587 *  Combine the partial column results into YA
00588 */
00589       if( YAsum && ( Amp > 0 ) )
00590       {
00591          top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
00592          Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow,
00593                    YAd[CSRC_] );
00594       }
00595    }
00596    else
00597    {
00598 /*
00599 *  Combine the partial row results into YA
00600 */
00601       if( YAsum && ( Anq > 0 ) )
00602       {
00603          top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
00604          Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_],
00605                    mycol );
00606       }
00607    }
00608 /*
00609 *  sub( Y ) := beta * sub( Y ) + YA (if necessary)
00610 */
00611    if( YApbY )
00612    {
00613 /*
00614 *  Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
00615 */
00616       PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow,
00617                    &Ycol );
00618 
00619       if( *INCY == Yd[M_] )
00620       {
00621 /*
00622 *  sub( Y ) resides in (a) process row(s)
00623 */
00624          if( ( myrow == Yrow ) || ( Yrow < 0 ) )
00625          {
00626 /*
00627 *  Make sure I own some data and scale sub( Y )
00628 */
00629             Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
00630                               npcol );
00631             if( Ynq > 0 )
00632             {
00633                Yld = Yd[LLD_];
00634                dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
00635                         Yjj, Yld, usiz ), &Yld );
00636             }
00637          }
00638       }
00639       else
00640       {
00641 /*
00642 *  sub( Y ) resides in (a) process column(s)
00643 */
00644          if( ( mycol == Ycol ) || ( Ycol < 0 ) )
00645          {
00646 /*
00647 *  Make sure I own some data and scale sub( Y )
00648 */
00649             Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
00650                               nprow );
00651             if( Ynp > 0 )
00652             {
00653                dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
00654                         Yjj, Yd[LLD_], usiz ), INCY );
00655             }
00656          }
00657       }
00658 
00659       if( notran )
00660       {
00661          PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one,
00662                      ((char *) Y), Yi, Yj, Yd, &Yroc );
00663       }
00664       else
00665       {
00666          PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW,    one,
00667                      ((char *) Y), Yi, Yj, Yd, &Yroc );
00668       }
00669    }
00670    if( YAfr ) free( YA );
00671 /*
00672 *  End of PDATRMV
00673 */
00674 }