|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 /* --------------------------------------------------------------------- 00002 * 00003 * -- PBLAS routine (version 2.0) -- 00004 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00005 * and University of California, Berkeley. 00006 * April 1, 1998 00007 * 00008 * --------------------------------------------------------------------- 00009 */ 00010 /* 00011 * Include files 00012 */ 00013 #include "pblas.h" 00014 #include "PBpblas.h" 00015 #include "PBtools.h" 00016 #include "PBblacs.h" 00017 #include "PBblas.h" 00018 00019 #ifdef __STDC__ 00020 void pzahemv_( F_CHAR_T UPLO, int * N, double * ALPHA, 00021 double * A, int * IA, int * JA, int * DESCA, 00022 double * X, int * IX, int * JX, int * DESCX, int * INCX, 00023 double * BETA, 00024 double * Y, int * IY, int * JY, int * DESCY, int * INCY ) 00025 #else 00026 void pzahemv_( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX, DESCX, 00027 INCX, BETA, Y, IY, JY, DESCY, INCY ) 00028 /* 00029 * .. Scalar Arguments .. 00030 */ 00031 F_CHAR_T UPLO; 00032 int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, 00033 * N; 00034 double * ALPHA, * BETA; 00035 /* 00036 * .. Array Arguments .. 00037 */ 00038 int * DESCA, * DESCX, * DESCY; 00039 double * A, * X, * Y; 00040 #endif 00041 { 00042 /* 00043 * Purpose 00044 * ======= 00045 * 00046 * PZAHEMV performs the matrix-vector operation 00047 * 00048 * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + 00049 * abs( beta*sub( Y ) ), 00050 * 00051 * where 00052 * 00053 * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), 00054 * 00055 * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, 00056 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, 00057 * 00058 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, 00059 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. 00060 * 00061 * Alpha and beta are real scalars, sub( Y ) is a n element real subvec- 00062 * tor, sub( X ) is an n element subvector and sub( A ) is an n by n 00063 * Hermitian submatrix. 00064 * 00065 * Notes 00066 * ===== 00067 * 00068 * A description vector is associated with each 2D block-cyclicly dis- 00069 * tributed matrix. This vector stores the information required to 00070 * establish the mapping between a matrix entry and its corresponding 00071 * process and memory location. 00072 * 00073 * In the following comments, the character _ should be read as 00074 * "of the distributed matrix". Let A be a generic term for any 2D 00075 * block cyclicly distributed matrix. Its description vector is DESC_A: 00076 * 00077 * NOTATION STORED IN EXPLANATION 00078 * ---------------- --------------- ------------------------------------ 00079 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. 00080 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating 00081 * the NPROW x NPCOL BLACS process grid 00082 * A is distributed over. The context 00083 * itself is global, but the handle 00084 * (the integer value) may vary. 00085 * M_A (global) DESCA[ M_ ] The number of rows in the distribu- 00086 * ted matrix A, M_A >= 0. 00087 * N_A (global) DESCA[ N_ ] The number of columns in the distri- 00088 * buted matrix A, N_A >= 0. 00089 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left 00090 * block of the matrix A, IMB_A > 0. 00091 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper 00092 * left block of the matrix A, 00093 * INB_A > 0. 00094 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- 00095 * bute the last M_A-IMB_A rows of A, 00096 * MB_A > 0. 00097 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- 00098 * bute the last N_A-INB_A columns of 00099 * A, NB_A > 0. 00100 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first 00101 * row of the matrix A is distributed, 00102 * NPROW > RSRC_A >= 0. 00103 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the 00104 * first column of A is distributed. 00105 * NPCOL > CSRC_A >= 0. 00106 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local 00107 * array storing the local blocks of 00108 * the distributed matrix A, 00109 * IF( Lc( 1, N_A ) > 0 ) 00110 * LLD_A >= MAX( 1, Lr( 1, M_A ) ) 00111 * ELSE 00112 * LLD_A >= 1. 00113 * 00114 * Let K be the number of rows of a matrix A starting at the global in- 00115 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows 00116 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would 00117 * receive if these K rows were distributed over NPROW processes. If K 00118 * is the number of columns of a matrix A starting at the global index 00119 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- 00120 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if 00121 * these K columns were distributed over NPCOL processes. 00122 * 00123 * The values of Lr() and Lc() may be determined via a call to the func- 00124 * tion PB_Cnumroc: 00125 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) 00126 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) 00127 * 00128 * Arguments 00129 * ========= 00130 * 00131 * UPLO (global input) CHARACTER*1 00132 * On entry, UPLO specifies whether the local pieces of 00133 * the array A containing the upper or lower triangular part 00134 * of the Hermitian submatrix sub( A ) are to be referenced as 00135 * follows: 00136 * 00137 * UPLO = 'U' or 'u' Only the local pieces corresponding to 00138 * the upper triangular part of the 00139 * Hermitian submatrix sub( A ) are to be 00140 * referenced, 00141 * 00142 * UPLO = 'L' or 'l' Only the local pieces corresponding to 00143 * the lower triangular part of the 00144 * Hermitian submatrix sub( A ) are to be 00145 * referenced. 00146 * 00147 * N (global input) INTEGER 00148 * On entry, N specifies the order of the submatrix sub( A ). 00149 * N must be at least zero. 00150 * 00151 * ALPHA (global input) DOUBLE PRECISION 00152 * On entry, ALPHA specifies the scalar alpha. When ALPHA is 00153 * supplied as zero then the local entries of the arrays A 00154 * and X corresponding to the entries of the submatrix sub( A ) 00155 * and the subvector sub( X ) need not be set on input. 00156 * 00157 * A (local input) COMPLEX*16 array 00158 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is 00159 * at least Lc( 1, JA+N-1 ). Before entry, this array contains 00160 * the local entries of the matrix A. 00161 * Before entry with UPLO = 'U' or 'u', this array contains 00162 * the local entries of the upper triangular part of the 00163 * Hermitian submatrix sub( A ), and the local entries of the 00164 * strictly lower triangular of sub( A ) are not referenced. 00165 * Before entry with UPLO = 'L' or 'l', this array contains 00166 * the local entries of the lower triangular part of the 00167 * Hermitian submatrix sub( A ), and the local entries of the 00168 * strictly upper triangular of sub( A ) are not referenced. 00169 * Note that the imaginary parts of the local entries corres- 00170 * ponding to the diagonal elements of sub( A ) need not be 00171 * set and assumed to be zero. 00172 * 00173 * IA (global input) INTEGER 00174 * On entry, IA specifies A's global row index, which points to 00175 * the beginning of the submatrix sub( A ). 00176 * 00177 * JA (global input) INTEGER 00178 * On entry, JA specifies A's global column index, which points 00179 * to the beginning of the submatrix sub( A ). 00180 * 00181 * DESCA (global and local input) INTEGER array 00182 * On entry, DESCA is an integer array of dimension DLEN_. This 00183 * is the array descriptor for the matrix A. 00184 * 00185 * X (local input) COMPLEX*16 array 00186 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X 00187 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and 00188 * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least 00189 * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. 00190 * Before entry, this array contains the local entries of the 00191 * matrix X. 00192 * 00193 * IX (global input) INTEGER 00194 * On entry, IX specifies X's global row index, which points to 00195 * the beginning of the submatrix sub( X ). 00196 * 00197 * JX (global input) INTEGER 00198 * On entry, JX specifies X's global column index, which points 00199 * to the beginning of the submatrix sub( X ). 00200 * 00201 * DESCX (global and local input) INTEGER array 00202 * On entry, DESCX is an integer array of dimension DLEN_. This 00203 * is the array descriptor for the matrix X. 00204 * 00205 * INCX (global input) INTEGER 00206 * On entry, INCX specifies the global increment for the 00207 * elements of X. Only two values of INCX are supported in 00208 * this version, namely 1 and M_X. INCX must not be zero. 00209 * 00210 * BETA (global input) DOUBLE PRECISION 00211 * On entry, BETA specifies the scalar beta. When BETA is 00212 * supplied as zero then the local entries of the array Y 00213 * corresponding to the entries of the subvector sub( Y ) need 00214 * not be set on input. 00215 * 00216 * Y (local input/local output) DOUBLE PRECISION array 00217 * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y 00218 * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and 00219 * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least 00220 * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. 00221 * Before entry, this array contains the local entries of the 00222 * matrix Y. On exit, sub( Y ) is overwritten by the updated 00223 * subvector. 00224 * 00225 * IY (global input) INTEGER 00226 * On entry, IY specifies Y's global row index, which points to 00227 * the beginning of the submatrix sub( Y ). 00228 * 00229 * JY (global input) INTEGER 00230 * On entry, JY specifies Y's global column index, which points 00231 * to the beginning of the submatrix sub( Y ). 00232 * 00233 * DESCY (global and local input) INTEGER array 00234 * On entry, DESCY is an integer array of dimension DLEN_. This 00235 * is the array descriptor for the matrix Y. 00236 * 00237 * INCY (global input) INTEGER 00238 * On entry, INCY specifies the global increment for the 00239 * elements of Y. Only two values of INCY are supported in 00240 * this version, namely 1 and M_Y. INCY must not be zero. 00241 * 00242 * -- Written on April 1, 1998 by 00243 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. 00244 * 00245 * --------------------------------------------------------------------- 00246 */ 00247 /* 00248 * .. Local Scalars .. 00249 */ 00250 char UploA, * one, top; 00251 int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Amp, 00252 Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld, Xi, Xj, 00253 YCfr, YCpbY, YCsum, YCld, YRfr, YRpbY, YRsum, YRld, Ycol, Yi, 00254 Yii, YisRow, Yj, Yjj, Yld, Ynp, Ynq, Yrow, ctxt, info, ione=1, 00255 k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper, 00256 usiz; 00257 double * tbeta; 00258 PBTYP_T * type, * utyp; 00259 /* 00260 * .. Local Arrays .. 00261 */ 00262 int Ad [DLEN_], Ad0[DLEN_], XCd[DLEN_], XRd[DLEN_], Xd[DLEN_], 00263 YCd[DLEN_], YRd[DLEN_], Yd [DLEN_]; 00264 char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL, 00265 * YR = NULL; 00266 /* .. 00267 * .. Executable Statements .. 00268 * 00269 */ 00270 upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); 00271 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); 00272 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); 00273 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); 00274 #ifndef NO_ARGCHK 00275 /* 00276 * Test the input parameters 00277 */ 00278 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00279 if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) 00280 { 00281 if( ( !upper ) && ( UploA != CLOWER ) ) 00282 { 00283 PB_Cwarn( ctxt, __LINE__, __FILE__, "Illegal UPLO = %c\n", UploA ); 00284 info = -1; 00285 } 00286 PB_Cchkmat( ctxt, "PZAHEMV", "A", *N, 2, *N, 2, Ai, Aj, Ad, 7, &info ); 00287 PB_Cchkvec( ctxt, "PZAHEMV", "X", *N, 2, Xi, Xj, Xd, *INCX, 11, &info ); 00288 PB_Cchkvec( ctxt, "PZAHEMV", "Y", *N, 2, Yi, Yj, Yd, *INCY, 17, &info ); 00289 } 00290 if( info ) { PB_Cabort( ctxt, "PZAHEMV", info ); return; } 00291 #endif 00292 /* 00293 * Quick return if possible 00294 */ 00295 if( ( *N == 0 ) || 00296 ( ( ALPHA[REAL_PART] == ZERO ) && ( BETA[REAL_PART] == ONE ) ) ) 00297 return; 00298 /* 00299 * Retrieve process grid information 00300 */ 00301 #ifdef NO_ARGCHK 00302 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00303 #endif 00304 /* 00305 * Get type structure 00306 */ 00307 type = PB_Cztypeset(); size = type->size; 00308 utyp = PB_Cdtypeset(); usiz = type->usiz; 00309 /* 00310 * and when alpha is zero 00311 */ 00312 if( ALPHA[REAL_PART] == ZERO ) 00313 { 00314 /* 00315 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol 00316 */ 00317 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, 00318 &Yrow, &Ycol ); 00319 00320 if( *INCY == Yd[M_] ) 00321 { 00322 /* 00323 * sub( Y ) resides in (a) process row(s) 00324 */ 00325 if( ( myrow == Yrow ) || ( Yrow < 0 ) ) 00326 { 00327 /* 00328 * Make sure I own some data and scale sub( Y ) 00329 */ 00330 Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], 00331 npcol ); 00332 if( Ynq > 0 ) 00333 { 00334 Yld = Yd[LLD_]; 00335 dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, 00336 Yjj, Yld, usiz ), &Yld ); 00337 } 00338 } 00339 } 00340 else 00341 { 00342 /* 00343 * sub( Y ) resides in (a) process column(s) 00344 */ 00345 if( ( mycol == Ycol ) || ( Ycol < 0 ) ) 00346 { 00347 /* 00348 * Make sure I own some data and scale sub( Y ) 00349 */ 00350 Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], 00351 nprow ); 00352 if( Ynp > 0 ) 00353 { 00354 dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, 00355 Yjj, Yd[LLD_], usiz ), INCY ); 00356 } 00357 } 00358 } 00359 return; 00360 } 00361 /* 00362 * Compute descriptor Ad0 for sub( A ) 00363 */ 00364 PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, 00365 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); 00366 /* 00367 * Reuse sub( Y ) and/or create vectors YR in process rows and YC in process 00368 * columns spanned by sub( A ) 00369 */ 00370 if( ( YisRow = ( *INCY == Yd[M_] ) ) != 0 ) 00371 { 00372 PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), 00373 Yi, Yj, Yd, ROW, ((char**)(&tbeta)), &YR, YRd, &YRfr, 00374 &YRsum, &YRpbY ); 00375 PB_COutV( utyp, COLUMN, INIT, *N, *N, Ad0, 1, &YC, YCd, &YCfr, &YCsum ); 00376 } 00377 else 00378 { 00379 PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *)BETA), ((char *) Y), 00380 Yi, Yj, Yd, COLUMN, ((char**)(&tbeta)), &YC, YCd, &YCfr, 00381 &YCsum, &YCpbY ); 00382 PB_COutV( utyp, ROW, INIT, *N, *N, Ad0, 1, &YR, YRd, &YRfr, &YRsum ); 00383 } 00384 /* 00385 * Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by 00386 * sub( A ) 00387 */ 00388 if( *INCX == Xd[M_] ) 00389 { 00390 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, 00391 ROW, &XR, XRd, &XRfr ); 00392 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0, XRd, 00393 ROW, &XC, XCd, &XCfr ); 00394 } 00395 else 00396 { 00397 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, 00398 COLUMN, &XC, XCd, &XCfr ); 00399 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0, XCd, 00400 COLUMN, &XR, XRd, &XRfr ); 00401 } 00402 one = type->one; 00403 /* 00404 * Local matrix-vector multiply iff I own some data 00405 */ 00406 Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; 00407 Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; 00408 Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); 00409 Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); 00410 00411 if( ( Amp > 0 ) && ( Anq > 0 ) ) 00412 { 00413 Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); 00414 00415 XCld = XCd[LLD_]; XRld = XRd[LLD_]; YCld = YCd[LLD_]; YRld = YRd[LLD_]; 00416 /* 00417 * Scale YR or YC in the case sub( Y ) has been reused 00418 */ 00419 if( YisRow ) 00420 { 00421 /* 00422 * YR resides in (a) process row(s) 00423 */ 00424 if( !YRpbY ) 00425 { 00426 if( ( myrow == YRd[RSRC_] ) || ( YRd[RSRC_] < 0 ) ) 00427 { 00428 /* 00429 * Make sure I own some data and scale YR 00430 */ 00431 if( Anq > 0 ) 00432 dascal_( &Anq, ((char *) tbeta), YR, &YRld ); 00433 } 00434 } 00435 } 00436 else 00437 { 00438 /* 00439 * YC resides in (a) process column(s) 00440 */ 00441 if( !YCpbY ) 00442 { 00443 if( ( mycol == YCd[CSRC_] ) || ( YCd[CSRC_] < 0 ) ) 00444 { 00445 /* 00446 * Make sure I own some data and scale YC 00447 */ 00448 if( Amp > 0 ) 00449 dascal_( &Amp, ((char *) tbeta), YC, &ione ); 00450 } 00451 } 00452 } 00453 /* 00454 * Computational partitioning size is computed as the product of the logical 00455 * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) 00456 */ 00457 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * 00458 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); 00459 00460 if( upper ) 00461 { 00462 for( k = 0; k < *N; k += nb ) 00463 { 00464 kb = *N - k; kb = MIN( kb, nb ); 00465 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); 00466 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); 00467 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); 00468 if( Akp > 0 && Anq0 > 0 ) 00469 { 00470 zagemv_( C2F_CHAR( NOTRAN ), &Akp, &Anq0, ((char *)ALPHA), 00471 Mptr( Aptr, 0, Akq, Ald, size ), &Ald, Mptr( XR, 0, Akq, 00472 XRld, size ), &XRld, one, YC, &ione ); 00473 zagemv_( C2F_CHAR( COTRAN ), &Akp, &Anq0, ((char *)ALPHA), 00474 Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XC, &ione, one, 00475 Mptr( YR, 0, Akq, YRld, usiz ), &YRld ); 00476 } 00477 PB_Cpsym( type, utyp, LEFT, UPPER, kb, 1, ((char *) ALPHA), Aptr, k, 00478 k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, 00479 Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), 00480 YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzahemv ); 00481 } 00482 } 00483 else 00484 { 00485 for( k = 0; k < *N; k += nb ) 00486 { 00487 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); 00488 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); 00489 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); 00490 PB_Cpsym( type, utyp, LEFT, LOWER, kb, 1, ((char *) ALPHA), Aptr, k, 00491 k, Ad0, Mptr( XC, Akp, 0, XCld, size ), XCld, Mptr( XR, 0, 00492 Akq, XRld, size ), XRld, Mptr( YC, Akp, 0, YCld, usiz ), 00493 YCld, Mptr( YR, 0, Akq, YRld, usiz ), YRld, PB_Ctzahemv ); 00494 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); 00495 Amp0 = Amp - Akp; 00496 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); 00497 if( Amp0 > 0 && Anq0 > 0 ) 00498 { 00499 zagemv_( C2F_CHAR( NOTRAN ), &Amp0, &Anq0, ((char *) ALPHA), 00500 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XR, 0, 00501 Akq, XRld, size ), &XRld, one, Mptr( YC, Akp, 0, YCld, 00502 usiz ), &ione ); 00503 zagemv_( C2F_CHAR( COTRAN ), &Amp0, &Anq0, ((char *) ALPHA), 00504 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, Mptr( XC, Akp, 00505 0, XCld, size ), &ione, one, Mptr( YR, 0, Akq, YRld, 00506 usiz ), &YRld ); 00507 } 00508 } 00509 } 00510 } 00511 if( XCfr ) free( XC ); 00512 if( XRfr ) free( XR ); 00513 00514 if( YisRow ) 00515 { 00516 /* 00517 * Combine the partial column results into YC 00518 */ 00519 if( YCsum ) 00520 { 00521 YCd[CSRC_] = 0; 00522 if( Amp > 0 ) 00523 { 00524 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); 00525 Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 0 ); 00526 } 00527 } 00528 /* 00529 * Combine the partial row results into YR 00530 */ 00531 if( YRsum && ( Anq > 0 ) ) 00532 { 00533 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); 00534 Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], YRd[RSRC_], 00535 mycol ); 00536 } 00537 /* 00538 * YR := YR + YC 00539 */ 00540 PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, 00541 YR, 0, 0, YRd, ROW ); 00542 /* 00543 * sub( Y ) := beta * sub( Y ) + YR (if necessary) 00544 */ 00545 if( YRpbY ) 00546 { 00547 /* 00548 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol 00549 */ 00550 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, 00551 &Ycol ); 00552 /* 00553 * sub( Y ) resides in (a) process row(s) 00554 */ 00555 if( ( myrow == Yrow ) || Yrow < 0 ) 00556 { 00557 /* 00558 * Make sure I own some data and scale sub( Y ) 00559 */ 00560 Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], 00561 npcol ); 00562 if( Ynq > 0 ) 00563 { 00564 Yld = Yd[LLD_]; 00565 dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, 00566 Yjj, Yld, usiz ), &Yld ); 00567 } 00568 } 00569 PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, 00570 ((char *) Y), Yi, Yj, Yd, ROW ); 00571 } 00572 } 00573 else 00574 { 00575 /* 00576 * Combine the partial row results into YR 00577 */ 00578 if( YRsum ) 00579 { 00580 YRd[RSRC_] = 0; 00581 if( Anq > 0 ) 00582 { 00583 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); 00584 Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YR, YRd[LLD_], 0, 00585 mycol ); 00586 } 00587 } 00588 /* 00589 * Combine the partial column results into YC 00590 */ 00591 if( YCsum && ( Amp > 0 ) ) 00592 { 00593 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); 00594 Cdgsum2d( ctxt, ROW, &top, Amp, 1, YC, YCd[LLD_], myrow, 00595 YCd[CSRC_] ); 00596 } 00597 /* 00598 * YC := YR + YC 00599 */ 00600 PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YR, 0, 0, YRd, ROW, one, 00601 YC, 0, 0, YCd, COLUMN ); 00602 /* 00603 * sub( Y ) := beta * sub( Y ) + YC (if necessary) 00604 */ 00605 if( YCpbY ) 00606 { 00607 /* 00608 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol 00609 */ 00610 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, 00611 &Ycol ); 00612 /* 00613 * sub( Y ) resides in (a) process column(s) 00614 */ 00615 if( ( mycol == Ycol ) || Ycol < 0 ) 00616 { 00617 /* 00618 * Make sure I own some data and scale sub( Y ) 00619 */ 00620 Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], 00621 nprow ); 00622 if( Ynp > 0 ) 00623 { 00624 dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, 00625 Yjj, Yd[LLD_], usiz ), INCY ); 00626 } 00627 } 00628 PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YC, 0, 0, YCd, COLUMN, one, 00629 ((char *) Y), Yi, Yj, Yd, COLUMN ); 00630 } 00631 } 00632 if( YCfr ) free( YC ); 00633 if( YRfr ) free( YR ); 00634 /* 00635 * End of PZAHEMV 00636 */ 00637 }