ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pzahemv_.c
Go to the documentation of this file.
00001 /* ---------------------------------------------------------------------
00002 *
00003 *  -- PBLAS routine (version 2.0) --
00004 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00005 *     and University of California, Berkeley.
00006 *     April 1, 1998
00007 *
00008 *  ---------------------------------------------------------------------
00009 */
00010 /*
00011 *  Include files
00012 */
00013 #include "pblas.h"
00014 #include "PBpblas.h"
00015 #include "PBtools.h"
00016 #include "PBblacs.h"
00017 #include "PBblas.h"
00018 
00019 #ifdef __STDC__
00020 void pzahemv_( F_CHAR_T UPLO, int * N, double * ALPHA,
00021                double * A, int * IA, int * JA, int * DESCA,
00022                double * X, int * IX, int * JX, int * DESCX, int * INCX,
00023                double * BETA,
00024                double * Y, int * IY, int * JY, int * DESCY, int * INCY )
00025 #else
00026 void pzahemv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX,
00027                INCX, BETA, Y, IY, JY, DESCY, INCY )
00028 /*
00029 *  .. Scalar Arguments ..
00030 */
00031    F_CHAR_T       UPLO;
00032    int            * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
00033                   * N;
00034    double         * ALPHA, * BETA;
00035 /*
00036 *  .. Array Arguments ..
00037 */
00038    int            * DESCA, * DESCX, * DESCY;
00039    double         * A, * X, * Y;
00040 #endif
00041 {
00042 /*
00043 *  Purpose
00044 *  =======
00045 *
00046 *  PZAHEMV  performs the matrix-vector operation
00047 *
00048 *     sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) +
00049 *                 abs( beta*sub( Y ) ),
00050 *
00051 *  where
00052 *
00053 *     sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1),
00054 *
00055 *     sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
00056 *                      X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and,
00057 *
00058 *     sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
00059 *                      Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
00060 *
00061 *  Alpha and beta are real scalars, sub( Y ) is a n element real subvec-
00062 *  tor,  sub( X )  is  an n element subvector  and sub( A ) is an n by n
00063 *  Hermitian submatrix.
00064 *
00065 *  Notes
00066 *  =====
00067 *
00068 *  A description  vector  is associated with each 2D block-cyclicly dis-
00069 *  tributed matrix.  This  vector  stores  the  information  required to
00070 *  establish the  mapping  between a  matrix entry and its corresponding
00071 *  process and memory location.
00072 *
00073 *  In  the  following  comments,   the character _  should  be  read  as
00074 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
00075 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
00076 *
00077 *  NOTATION         STORED IN       EXPLANATION
00078 *  ---------------- --------------- ------------------------------------
00079 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
00080 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
00081 *                                   the NPROW x NPCOL BLACS process grid
00082 *                                   A  is  distributed over. The context
00083 *                                   itself  is  global,  but  the handle
00084 *                                   (the integer value) may vary.
00085 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
00086 *                                   ted matrix A, M_A >= 0.
00087 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
00088 *                                   buted matrix A, N_A >= 0.
00089 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
00090 *                                   block of the matrix A, IMB_A > 0.
00091 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
00092 *                                   left   block   of   the  matrix   A,
00093 *                                   INB_A > 0.
00094 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
00095 *                                   bute the last  M_A-IMB_A  rows of A,
00096 *                                   MB_A > 0.
00097 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
00098 *                                   bute the last  N_A-INB_A  columns of
00099 *                                   A, NB_A > 0.
00100 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
00101 *                                   row of the matrix  A is distributed,
00102 *                                   NPROW > RSRC_A >= 0.
00103 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
00104 *                                   first column of  A  is  distributed.
00105 *                                   NPCOL > CSRC_A >= 0.
00106 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
00107 *                                   array  storing  the  local blocks of
00108 *                                   the distributed matrix A,
00109 *                                   IF( Lc( 1, N_A ) > 0 )
00110 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
00111 *                                   ELSE
00112 *                                      LLD_A >= 1.
00113 *
00114 *  Let K be the number of  rows of a matrix A starting at the global in-
00115 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
00116 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
00117 *  receive if these K rows were distributed over NPROW processes.  If  K
00118 *  is the number of columns of a matrix  A  starting at the global index
00119 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
00120 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
00121 *  these K columns were distributed over NPCOL processes.
00122 *
00123 *  The values of Lr() and Lc() may be determined via a call to the func-
00124 *  tion PB_Cnumroc:
00125 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
00126 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
00127 *
00128 *  Arguments
00129 *  =========
00130 *
00131 *  UPLO    (global input) CHARACTER*1
00132 *          On  entry,   UPLO  specifies  whether  the  local  pieces  of
00133 *          the array  A  containing the  upper or lower triangular  part
00134 *          of the Hermitian submatrix  sub( A )  are to be referenced as
00135 *          follows:
00136 *
00137 *             UPLO = 'U' or 'u'   Only the local pieces corresponding to
00138 *                                 the   upper  triangular  part  of  the
00139 *                                 Hermitian submatrix sub( A ) are to be
00140 *                                 referenced,
00141 *
00142 *             UPLO = 'L' or 'l'   Only the local pieces corresponding to
00143 *                                 the   lower  triangular  part  of  the
00144 *                                 Hermitian submatrix sub( A ) are to be
00145 *                                 referenced.
00146 *
00147 *  N       (global input) INTEGER
00148 *          On entry,  N specifies the order of the  submatrix  sub( A ).
00149 *          N must be at least zero.
00150 *
00151 *  ALPHA   (global input) DOUBLE PRECISION
00152 *          On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
00153 *          supplied  as  zero  then  the  local entries of the arrays  A
00154 *          and X corresponding to the entries of the submatrix  sub( A )
00155 *          and the subvector sub( X ) need not be set on input.
00156 *
00157 *  A       (local input) COMPLEX*16 array
00158 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
00159 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
00160 *          the local entries of the matrix A.
00161 *          Before  entry  with  UPLO = 'U' or 'u', this  array  contains
00162 *          the  local  entries  of  the  upper  triangular  part  of the
00163 *          Hermitian  submatrix  sub( A ), and  the local entries of the
00164 *          strictly lower triangular of sub( A ) are not referenced.
00165 *          Before  entry  with  UPLO = 'L' or 'l', this  array  contains
00166 *          the  local  entries  of  the  lower  triangular  part  of the
00167 *          Hermitian  submatrix  sub( A ), and  the local entries of the
00168 *          strictly upper triangular of sub( A ) are not referenced.
00169 *          Note that the  imaginary parts  of the local entries  corres-
00170 *          ponding to the  diagonal elements  of  sub( A )  need not  be
00171 *          set and assumed to be zero.
00172 *
00173 *  IA      (global input) INTEGER
00174 *          On entry, IA  specifies A's global row index, which points to
00175 *          the beginning of the submatrix sub( A ).
00176 *
00177 *  JA      (global input) INTEGER
00178 *          On entry, JA  specifies A's global column index, which points
00179 *          to the beginning of the submatrix sub( A ).
00180 *
00181 *  DESCA   (global and local input) INTEGER array
00182 *          On entry, DESCA  is an integer array of dimension DLEN_. This
00183 *          is the array descriptor for the matrix A.
00184 *
00185 *  X       (local input) COMPLEX*16 array
00186 *          On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
00187 *          is   at  least  MAX( 1, Lr( 1, IX ) )  when  INCX = M_X   and
00188 *          MAX( 1, Lr( 1, IX+N-1 ) )  otherwise,  and,  Kx  is  at least
00189 *          Lc( 1, JX+N-1 )  when  INCX = M_X  and Lc( 1, JX ) otherwise.
00190 *          Before  entry,  this  array contains the local entries of the
00191 *          matrix X.
00192 *
00193 *  IX      (global input) INTEGER
00194 *          On entry, IX  specifies X's global row index, which points to
00195 *          the beginning of the submatrix sub( X ).
00196 *
00197 *  JX      (global input) INTEGER
00198 *          On entry, JX  specifies X's global column index, which points
00199 *          to the beginning of the submatrix sub( X ).
00200 *
00201 *  DESCX   (global and local input) INTEGER array
00202 *          On entry, DESCX  is an integer array of dimension DLEN_. This
00203 *          is the array descriptor for the matrix X.
00204 *
00205 *  INCX    (global input) INTEGER
00206 *          On entry,  INCX   specifies  the  global  increment  for  the
00207 *          elements of  X.  Only two values of  INCX   are  supported in
00208 *          this version, namely 1 and M_X. INCX  must not be zero.
00209 *
00210 *  BETA    (global input) DOUBLE PRECISION
00211 *          On entry,  BETA  specifies the scalar  beta.   When  BETA  is
00212 *          supplied  as  zero  then  the  local entries of  the array  Y
00213 *          corresponding to the entries of the subvector  sub( Y )  need
00214 *          not be set on input.
00215 *
00216 *  Y       (local input/local output) DOUBLE PRECISION array
00217 *          On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
00218 *          is   at  least  MAX( 1, Lr( 1, IY ) )  when  INCY = M_Y   and
00219 *          MAX( 1, Lr( 1, IY+N-1 ) )  otherwise,  and,  Ky  is  at least
00220 *          Lc( 1, JY+N-1 )  when  INCY = M_Y  and Lc( 1, JY ) otherwise.
00221 *          Before  entry,  this array  contains the local entries of the
00222 *          matrix Y.  On exit,  sub( Y )  is  overwritten by the updated
00223 *          subvector.
00224 *
00225 *  IY      (global input) INTEGER
00226 *          On entry, IY  specifies Y's global row index, which points to
00227 *          the beginning of the submatrix sub( Y ).
00228 *
00229 *  JY      (global input) INTEGER
00230 *          On entry, JY  specifies Y's global column index, which points
00231 *          to the beginning of the submatrix sub( Y ).
00232 *
00233 *  DESCY   (global and local input) INTEGER array
00234 *          On entry, DESCY  is an integer array of dimension DLEN_. This
00235 *          is the array descriptor for the matrix Y.
00236 *
00237 *  INCY    (global input) INTEGER
00238 *          On entry,  INCY   specifies  the  global  increment  for  the
00239 *          elements of  Y.  Only two values of  INCY   are  supported in
00240 *          this version, namely 1 and M_Y. INCY  must not be zero.
00241 *
00242 *  -- Written on April 1, 1998 by
00243 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
00244 *
00245 *  ---------------------------------------------------------------------
00246 */
00247 /*
00248 *  .. Local Scalars ..
00249 */
00250    char           UploA, * one, top;
00251    int            Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp,
00252                   Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj,
00253                   YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi,
00254                   Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1,
00255                   k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper,
00256                   usiz;
00257    double         * tbeta;
00258    PBTYP_T        * type, * utyp;
00259 /*
00260 *  .. Local Arrays ..
00261 */
00262    int            Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_],
00263                   YCd[DLEN_], YRd[DLEN_], Yd [DLEN_];
00264    char           * Aptr  = NULL, * XC = NULL, * XR = NULL, * YC = NULL,
00265                   * YR    = NULL;
00266 /* ..
00267 *  .. Executable Statements ..
00268 *
00269 */
00270    upper = ( ( UploA  = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
00271    PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
00272    PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
00273    PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
00274 #ifndef NO_ARGCHK
00275 /*
00276 *  Test the input parameters
00277 */
00278    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
00279    if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) )
00280    {
00281       if( ( !upper ) && ( UploA != CLOWER ) )
00282       {
00283          PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA );
00284          info = -1;
00285       }
00286       PB_Cchkmat( ctxt, "PZAHEMV", "A", *N, 2, *N, 2, Ai, Aj, Ad,  7, &info );
00287       PB_Cchkvec( ctxt, "PZAHEMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info );
00288       PB_Cchkvec( ctxt, "PZAHEMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info );
00289    }
00290    if( info ) { PB_Cabort( ctxt, "PZAHEMV", info ); return; }
00291 #endif
00292 /*
00293 *  Quick return if possible
00294 */
00295    if( ( *N == 0 ) ||
00296        ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) )
00297       return;
00298 /*
00299 *  Retrieve process grid information
00300 */
00301 #ifdef NO_ARGCHK
00302    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
00303 #endif
00304 /*
00305 *  Get type structure
00306 */
00307    type = PB_Cztypeset(); size = type->size;
00308    utyp = PB_Cdtypeset(); usiz = type->usiz;
00309 /*
00310 *  and when alpha is zero
00311 */
00312    if( ALPHA[REAL_PART] == ZERO )
00313    {
00314 /*
00315 *  Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
00316 */
00317       PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj,
00318                    &Yrow, &Ycol );
00319 
00320       if( *INCY == Yd[M_] )
00321       {
00322 /*
00323 *  sub( Y ) resides in (a) process row(s)
00324 */
00325          if( ( myrow == Yrow ) || ( Yrow < 0 ) )
00326          {
00327 /*
00328 *  Make sure I own some data and scale sub( Y )
00329 */
00330             Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
00331                               npcol );
00332             if( Ynq > 0 )
00333             {
00334                Yld = Yd[LLD_];
00335                dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
00336                         Yjj, Yld, usiz ), &Yld );
00337             }
00338          }
00339       }
00340       else
00341       {
00342 /*
00343 *  sub( Y ) resides in (a) process column(s)
00344 */
00345          if( ( mycol == Ycol ) || ( Ycol < 0 ) )
00346          {
00347 /*
00348 *  Make sure I own some data and scale sub( Y )
00349 */
00350             Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
00351                               nprow );
00352             if( Ynp > 0 )
00353             {
00354                dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
00355                         Yjj, Yd[LLD_], usiz ), INCY );
00356             }
00357          }
00358       }
00359       return;
00360    }
00361 /*
00362 *  Compute descriptor Ad0 for sub( A )
00363 */
00364    PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
00365                  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
00366 /*
00367 *  Reuse sub( Y ) and/or create vectors YR in process rows and YC in process
00368 *  columns spanned by sub( A )
00369 */
00370    if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 )
00371    {
00372       PB_CInOutV( utyp, ROW,    *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y),
00373                   Yi, Yj, Yd, ROW,    ((char**)(&tbeta)), &YR, YRd, &YRfr,
00374                   &YRsum, &YRpbY );
00375       PB_COutV( utyp, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum );
00376    }
00377    else
00378    {
00379       PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y),
00380                   Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr,
00381                   &YCsum, &YCpbY );
00382       PB_COutV( utyp, ROW,    INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum );
00383    }
00384 /*
00385 *  Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by
00386 *  sub( A )
00387 */
00388    if( *INCX == Xd[M_] )
00389    {
00390       PB_CInV( type, NOCONJG, ROW,    *N, *N, Ad0, 1, ((char *) X), Xi, Xj,  Xd,
00391                ROW,    &XR, XRd, &XRfr );
00392       PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR,            0,  0, XRd,
00393                ROW,    &XC, XCd, &XCfr );
00394    }
00395    else
00396    {
00397       PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,  Xd,
00398                COLUMN, &XC, XCd, &XCfr );
00399       PB_CInV( type, NOCONJG, ROW,    *N, *N, Ad0, 1, XC,            0,  0, XCd,
00400                COLUMN, &XR, XRd, &XRfr );
00401    }
00402    one  = type->one;
00403 /*
00404 *  Local matrix-vector multiply iff I own some data
00405 */
00406    Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_];
00407    Acol  = Ad0[CSRC_]; Arow  = Ad0[RSRC_];
00408    Amp   = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
00409    Anq   = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
00410 
00411    if( ( Amp > 0 ) && ( Anq > 0 ) )
00412    {
00413       Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
00414 
00415       XCld = XCd[LLD_];  XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_];
00416 /*
00417 *  Scale YR or YC in the case sub( Y ) has been reused
00418 */
00419       if( YisRow )
00420       {
00421 /*
00422 *  YR resides in (a) process row(s)
00423 */
00424          if( !YRpbY )
00425          {
00426             if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) )
00427             {
00428 /*
00429 *  Make sure I own some data and scale YR
00430 */
00431                if( Anq > 0 )
00432                   dascal_( &Anq, ((char *) tbeta), YR, &YRld );
00433             }
00434          }
00435       }
00436       else
00437       {
00438 /*
00439 *  YC resides in (a) process column(s)
00440 */
00441          if( !YCpbY )
00442          {
00443             if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) )
00444             {
00445 /*
00446 *  Make sure I own some data and scale YC
00447 */
00448                if( Amp > 0 )
00449                   dascal_( &Amp, ((char *) tbeta), YC, &ione );
00450             }
00451          }
00452       }
00453 /*
00454 *  Computational partitioning size is computed as the product of the logical
00455 *  value returned by pilaenv_ and 2 * lcm( nprow, npcol )
00456 */
00457       nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) *
00458            PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
00459 
00460       if( upper )
00461       {
00462          for( k = 0; k < *N; k += nb )
00463          {
00464             kb   = *N - k; kb = MIN( kb, nb );
00465             Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
00466             Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
00467             Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
00468             if( Akp > 0 && Anq0 > 0 )
00469             {
00470                zagemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA),
00471                         Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq,
00472                         XRld, size ), &XRld, one, YC, &ione );
00473                zagemv_( C2F_CHAR( COTRAN ), &Akp, &Anq0, ((char *)ALPHA),
00474                        Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one,
00475                        Mptr( YR, 0, Akq, YRld, usiz ), &YRld );
00476             }
00477             PB_Cpsym( type, utyp, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k,
00478                       k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0,
00479                       Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ),
00480                       YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzahemv );
00481          }
00482       }
00483       else
00484       {
00485          for( k = 0; k < *N; k += nb )
00486          {
00487             kb  = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
00488             Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
00489             Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
00490             PB_Cpsym( type, utyp, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k,
00491                       k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0,
00492                       Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ),
00493                       YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzahemv );
00494             Akp  = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
00495             Amp0 = Amp - Akp;
00496             Anq0 = PB_Cnumroc( kb,   k, Ainb1, Anb, mycol, Acol, npcol );
00497             if( Amp0 > 0 && Anq0 > 0 )
00498             {
00499                zagemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA),
00500                         Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0,
00501                         Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld,
00502                         usiz ), &ione );
00503                zagemv_( C2F_CHAR( COTRAN ), &Amp0, &Anq0, ((char *) ALPHA),
00504                         Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp,
00505                         0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld,
00506                         usiz ), &YRld );
00507             }
00508          }
00509       }
00510    }
00511    if( XCfr ) free( XC );
00512    if( XRfr ) free( XR );
00513 
00514    if( YisRow )
00515    {
00516 /*
00517 *  Combine the partial column results into YC
00518 */
00519       if( YCsum )
00520       {
00521          YCd[CSRC_] = 0;
00522          if( Amp > 0 )
00523          {
00524             top        = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
00525             Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 );
00526          }
00527       }
00528 /*
00529 *  Combine the partial row results into YR
00530 */
00531       if( YRsum && ( Anq > 0 ) )
00532       {
00533          top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
00534          Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_],
00535                    mycol );
00536       }
00537 /*
00538 *  YR := YR + YC
00539 */
00540       PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one,
00541                   YR, 0, 0, YRd, ROW );
00542 /*
00543 *  sub( Y ) := beta * sub( Y ) + YR (if necessary)
00544 */
00545       if( YRpbY )
00546       {
00547 /*
00548 *     Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
00549 */
00550          PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow,
00551                       &Ycol );
00552 /*
00553 *  sub( Y ) resides in (a) process row(s)
00554 */
00555          if( ( myrow == Yrow ) || Yrow < 0 )
00556          {
00557 /*
00558 *  Make sure I own some data and scale sub( Y )
00559 */
00560             Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_],
00561                               npcol );
00562             if( Ynq > 0 )
00563             {
00564                Yld = Yd[LLD_];
00565                dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii,
00566                         Yjj, Yld, usiz ), &Yld );
00567             }
00568          }
00569          PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one,
00570                      ((char *) Y), Yi, Yj, Yd, ROW );
00571       }
00572    }
00573    else
00574    {
00575 /*
00576 *  Combine the partial row results into YR
00577 */
00578       if( YRsum )
00579       {
00580          YRd[RSRC_] = 0;
00581          if( Anq > 0 )
00582          {
00583             top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
00584             Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0,
00585                       mycol );
00586          }
00587       }
00588 /*
00589 *  Combine the partial column results into YC
00590 */
00591       if( YCsum && ( Amp > 0 ) )
00592       {
00593          top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
00594          Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow,
00595                    YCd[CSRC_] );
00596       }
00597 /*
00598 *  YC := YR + YC
00599 */
00600       PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one,
00601                   YC, 0, 0, YCd, COLUMN );
00602 /*
00603 *  sub( Y ) := beta * sub( Y ) + YC (if necessary)
00604 */
00605       if( YCpbY )
00606       {
00607 /*
00608 *  Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol
00609 */
00610          PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow,
00611                       &Ycol );
00612 /*
00613 *  sub( Y ) resides in (a) process column(s)
00614 */
00615          if( ( mycol == Ycol ) || Ycol < 0 )
00616          {
00617 /*
00618 *  Make sure I own some data and scale sub( Y )
00619 */
00620             Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_],
00621                               nprow );
00622             if( Ynp > 0 )
00623             {
00624                dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii,
00625                         Yjj, Yd[LLD_], usiz ), INCY );
00626             }
00627          }
00628          PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one,
00629                      ((char *) Y), Yi, Yj, Yd, COLUMN );
00630       }
00631    }
00632    if( YCfr ) free( YC );
00633    if( YRfr ) free( YR );
00634 /*
00635 *  End of PZAHEMV
00636 */
00637 }