|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 /* --------------------------------------------------------------------- 00002 * 00003 * -- PBLAS routine (version 2.0) -- 00004 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00005 * and University of California, Berkeley. 00006 * April 1, 1998 00007 * 00008 * --------------------------------------------------------------------- 00009 */ 00010 /* 00011 * Include files 00012 */ 00013 #include "pblas.h" 00014 #include "PBpblas.h" 00015 #include "PBtools.h" 00016 #include "PBblacs.h" 00017 #include "PBblas.h" 00018 00019 #ifdef __STDC__ 00020 void pdatrmv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N, 00021 double * ALPHA, 00022 double * A, int * IA, int * JA, int * DESCA, 00023 double * X, int * IX, int * JX, int * DESCX, 00024 int * INCX, double * BETA, 00025 double * Y, int * IY, int * JY, int * DESCY, 00026 int * INCY ) 00027 #else 00028 void pdatrmv_( UPLO, TRANS, DIAG, N, ALPHA, A, IA, JA, DESCA, X, IX, 00029 JX, DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY ) 00030 /* 00031 * .. Scalar Arguments .. 00032 */ 00033 F_CHAR_T DIAG, TRANS, UPLO; 00034 int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, 00035 * N; 00036 double * ALPHA, * BETA; 00037 /* 00038 * .. Array Arguments .. 00039 */ 00040 int * DESCA, * DESCX, * DESCY; 00041 double * A, * X, * Y; 00042 #endif 00043 { 00044 /* 00045 * Purpose 00046 * ======= 00047 * 00048 * PDATRMV performs one of the matrix-vector operations 00049 * 00050 * sub( Y ) := abs( alpha )*abs( sub( A ) )*abs( sub( X ) ) + 00051 * abs( beta*sub( Y ) ), 00052 * or 00053 * 00054 * sub( Y ) := abs( alpha )*abs( sub( A )' )*abs( sub( X ) ) + 00055 * abs( beta*sub( Y ) ), 00056 * 00057 * where 00058 * 00059 * sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), 00060 * 00061 * sub( X ) denotes X(IX:IX,JX:JX+N-1), if INCX = M_X, 00062 * X(IX:IX+N-1,JX:JX), if INCX = 1 and INCX <> M_X, 00063 * and, 00064 * 00065 * sub( Y ) denotes Y(IY:IY,JY:JY+N-1), if INCY = M_Y, 00066 * Y(IY:IY+N-1,JY:JY), if INCY = 1 and INCY <> M_Y. 00067 * 00068 * Alpha and beta are real scalars, sub( Y ) is a real subvector, 00069 * sub( X ) is a subvector and sub( A ) is an n by n triangular subma- 00070 * trix. 00071 * 00072 * Notes 00073 * ===== 00074 * 00075 * A description vector is associated with each 2D block-cyclicly dis- 00076 * tributed matrix. This vector stores the information required to 00077 * establish the mapping between a matrix entry and its corresponding 00078 * process and memory location. 00079 * 00080 * In the following comments, the character _ should be read as 00081 * "of the distributed matrix". Let A be a generic term for any 2D 00082 * block cyclicly distributed matrix. Its description vector is DESC_A: 00083 * 00084 * NOTATION STORED IN EXPLANATION 00085 * ---------------- --------------- ------------------------------------ 00086 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. 00087 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating 00088 * the NPROW x NPCOL BLACS process grid 00089 * A is distributed over. The context 00090 * itself is global, but the handle 00091 * (the integer value) may vary. 00092 * M_A (global) DESCA[ M_ ] The number of rows in the distribu- 00093 * ted matrix A, M_A >= 0. 00094 * N_A (global) DESCA[ N_ ] The number of columns in the distri- 00095 * buted matrix A, N_A >= 0. 00096 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left 00097 * block of the matrix A, IMB_A > 0. 00098 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper 00099 * left block of the matrix A, 00100 * INB_A > 0. 00101 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- 00102 * bute the last M_A-IMB_A rows of A, 00103 * MB_A > 0. 00104 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- 00105 * bute the last N_A-INB_A columns of 00106 * A, NB_A > 0. 00107 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first 00108 * row of the matrix A is distributed, 00109 * NPROW > RSRC_A >= 0. 00110 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the 00111 * first column of A is distributed. 00112 * NPCOL > CSRC_A >= 0. 00113 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local 00114 * array storing the local blocks of 00115 * the distributed matrix A, 00116 * IF( Lc( 1, N_A ) > 0 ) 00117 * LLD_A >= MAX( 1, Lr( 1, M_A ) ) 00118 * ELSE 00119 * LLD_A >= 1. 00120 * 00121 * Let K be the number of rows of a matrix A starting at the global in- 00122 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows 00123 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would 00124 * receive if these K rows were distributed over NPROW processes. If K 00125 * is the number of columns of a matrix A starting at the global index 00126 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- 00127 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if 00128 * these K columns were distributed over NPCOL processes. 00129 * 00130 * The values of Lr() and Lc() may be determined via a call to the func- 00131 * tion PB_Cnumroc: 00132 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) 00133 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) 00134 * 00135 * Arguments 00136 * ========= 00137 * 00138 * UPLO (global input) CHARACTER*1 00139 * On entry, UPLO specifies whether the submatrix sub( A ) is 00140 * an upper or lower triangular submatrix as follows: 00141 * 00142 * UPLO = 'U' or 'u' sub( A ) is an upper triangular 00143 * submatrix, 00144 * 00145 * UPLO = 'L' or 'l' sub( A ) is a lower triangular 00146 * submatrix. 00147 * 00148 * TRANS (global input) CHARACTER*1 00149 * On entry, TRANS specifies the operation to be performed as 00150 * follows: 00151 * 00152 * TRANS = 'N' or 'n' 00153 * sub( Y ) := |alpha|*|sub( A )|*|sub( X )| + 00154 * |beta*sub( Y )|. 00155 * 00156 * TRANS = 'T' or 't' 00157 * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| + 00158 * |beta*sub( Y )|. 00159 * 00160 * TRANS = 'C' or 'c' 00161 * sub( Y ) := |alpha|*|sub( A )'|*|sub( X )| + 00162 * |beta*sub( Y )|. 00163 * 00164 * DIAG (global input) CHARACTER*1 00165 * On entry, DIAG specifies whether or not sub( A ) is unit 00166 * triangular as follows: 00167 * 00168 * DIAG = 'U' or 'u' sub( A ) is assumed to be unit trian- 00169 * gular, 00170 * 00171 * DIAG = 'N' or 'n' sub( A ) is not assumed to be unit tri- 00172 * angular. 00173 * 00174 * N (global input) INTEGER 00175 * On entry, N specifies the order of the submatrix sub( A ). 00176 * N must be at least zero. 00177 * 00178 * ALPHA (global input) DOUBLE PRECISION 00179 * On entry, ALPHA specifies the scalar alpha. When ALPHA is 00180 * supplied as zero then the local entries of the arrays A 00181 * and X corresponding to the entries of the submatrix sub( A ) 00182 * and the subvector sub( X ) need not be set on input. 00183 * 00184 * A (local input) DOUBLE PRECISION array 00185 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is 00186 * at least Lc( 1, JA+N-1 ). Before entry, this array contains 00187 * the local entries of the matrix A. 00188 * Before entry with UPLO = 'U' or 'u', this array contains the 00189 * local entries corresponding to the entries of the upper tri- 00190 * angular submatrix sub( A ), and the local entries correspon- 00191 * ding to the entries of the strictly lower triangular part of 00192 * the submatrix sub( A ) are not referenced. 00193 * Before entry with UPLO = 'L' or 'l', this array contains the 00194 * local entries corresponding to the entries of the lower tri- 00195 * angular submatrix sub( A ), and the local entries correspon- 00196 * ding to the entries of the strictly upper triangular part of 00197 * the submatrix sub( A ) are not referenced. 00198 * Note that when DIAG = 'U' or 'u', the local entries corres- 00199 * ponding to the diagonal elements of the submatrix sub( A ) 00200 * are not referenced either, but are assumed to be unity. 00201 * 00202 * IA (global input) INTEGER 00203 * On entry, IA specifies A's global row index, which points to 00204 * the beginning of the submatrix sub( A ). 00205 * 00206 * JA (global input) INTEGER 00207 * On entry, JA specifies A's global column index, which points 00208 * to the beginning of the submatrix sub( A ). 00209 * 00210 * DESCA (global and local input) INTEGER array 00211 * On entry, DESCA is an integer array of dimension DLEN_. This 00212 * is the array descriptor for the matrix A. 00213 * 00214 * X (local input) DOUBLE PRECISION array 00215 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X 00216 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and 00217 * MAX( 1, Lr( 1, IX+Lx-1 ) ) otherwise, and, Kx is at least 00218 * Lc( 1, JX+Lx-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. 00219 * Lx is N when TRANS = 'N' or 'n' and M otherwise. Before en- 00220 * try, this array contains the local entries of the matrix X. 00221 * 00222 * IX (global input) INTEGER 00223 * On entry, IX specifies X's global row index, which points to 00224 * the beginning of the submatrix sub( X ). 00225 * 00226 * JX (global input) INTEGER 00227 * On entry, JX specifies X's global column index, which points 00228 * to the beginning of the submatrix sub( X ). 00229 * 00230 * DESCX (global and local input) INTEGER array 00231 * On entry, DESCX is an integer array of dimension DLEN_. This 00232 * is the array descriptor for the matrix X. 00233 * 00234 * INCX (global input) INTEGER 00235 * On entry, INCX specifies the global increment for the 00236 * elements of X. Only two values of INCX are supported in 00237 * this version, namely 1 and M_X. INCX must not be zero. 00238 * 00239 * BETA (global input) DOUBLE PRECISION 00240 * On entry, BETA specifies the scalar beta. When BETA is 00241 * supplied as zero then the local entries of the array Y 00242 * corresponding to the entries of the subvector sub( Y ) need 00243 * not be set on input. 00244 * 00245 * Y (local input/local output) DOUBLE PRECISION array 00246 * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y 00247 * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and 00248 * MAX( 1, Lr( 1, IY+Ly-1 ) ) otherwise, and, Ky is at least 00249 * Lc( 1, JY+Ly-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. 00250 * Ly is M when TRANS = 'N' or 'n' and N otherwise. Before en- 00251 * try, this array contains the local entries of the matrix Y. 00252 * On exit, sub( Y ) is overwritten by the updated subvector. 00253 * 00254 * IY (global input) INTEGER 00255 * On entry, IY specifies Y's global row index, which points to 00256 * the beginning of the submatrix sub( Y ). 00257 * 00258 * JY (global input) INTEGER 00259 * On entry, JY specifies Y's global column index, which points 00260 * to the beginning of the submatrix sub( Y ). 00261 * 00262 * DESCY (global and local input) INTEGER array 00263 * On entry, DESCY is an integer array of dimension DLEN_. This 00264 * is the array descriptor for the matrix Y. 00265 * 00266 * INCY (global input) INTEGER 00267 * On entry, INCY specifies the global increment for the 00268 * elements of Y. Only two values of INCY are supported in 00269 * this version, namely 1 and M_Y. INCY must not be zero. 00270 * 00271 * -- Written on April 1, 1998 by 00272 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. 00273 * 00274 * --------------------------------------------------------------------- 00275 */ 00276 /* 00277 * .. Local Scalars .. 00278 */ 00279 char DiagA, TranOp, UploA, Yroc, * one, top; 00280 int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, 00281 Amp, Amp0, Anb, Anq, Anq0, Arow, XAfr, XAld, Xi, Xj, YAfr, 00282 YAld, YApbY, YAsum, Ycol, Yi, Yii, Yj, Yjj, Yld, Ynp, Ynq, 00283 Yrow, ctxt, info, ione=1, k, kb, ktmp, mycol, myrow, nb, 00284 notran, nounit, npcol, nprow, size, upper, usiz; 00285 double * tbeta; 00286 PBTYP_T * type, * utyp; 00287 /* 00288 * .. Local Arrays .. 00289 */ 00290 int Ad [DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], 00291 Yd [DLEN_]; 00292 char * Aptr = NULL, * XA = NULL, * YA = NULL; 00293 /* .. 00294 * .. Executable Statements .. 00295 * 00296 */ 00297 upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); 00298 notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); 00299 nounit = ( ( DiagA = Mupcase( F2C_CHAR( DIAG )[0] ) ) == CNOUNIT ); 00300 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); 00301 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); 00302 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); 00303 #ifndef NO_ARGCHK 00304 /* 00305 * Test the input parameters 00306 */ 00307 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00308 if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) ) 00309 { 00310 if( ( !upper ) && ( UploA != CLOWER ) ) 00311 { 00312 PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal UPLO = %c\n", UploA ); 00313 info = -1; 00314 } 00315 else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) 00316 { 00317 PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal TRANS = %c\n", TranOp ); 00318 info = -2; 00319 } 00320 else if( ( !nounit ) && ( DiagA != CUNIT ) ) 00321 { 00322 PB_Cwarn( ctxt, __LINE__, "PDATRMV", "Illegal DIAG = %c\n", DiagA ); 00323 info = -3; 00324 } 00325 PB_Cchkmat( ctxt, "PDATRMV", "A", *N, 4, *N, 4, Ai, Aj, Ad, 9, &info ); 00326 PB_Cchkvec( ctxt, "PDATRMV", "X", *N, 4, Xi, Xj, Xd, *INCX, 13, &info ); 00327 PB_Cchkvec( ctxt, "PDATRMV", "Y", *N, 4, Yi, Yj, Yd, *INCY, 19, &info ); 00328 } 00329 if( info ) { PB_Cabort( ctxt, "PDATRMV", info ); return; } 00330 #endif 00331 /* 00332 * Quick return if possible 00333 */ 00334 if( ( *N == 0 ) || ( ( ALPHA[REAL_PART] == ZERO ) && 00335 ( BETA [REAL_PART] == ONE ) ) ) return; 00336 /* 00337 * Retrieve process grid information 00338 */ 00339 #ifdef NO_ARGCHK 00340 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00341 #endif 00342 /* 00343 * Get type structure 00344 */ 00345 type = utyp = PB_Cdtypeset(); 00346 size = usiz = type->size; 00347 /* 00348 * and when alpha is zero 00349 */ 00350 if( ALPHA[REAL_PART] == ZERO ) 00351 { 00352 /* 00353 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol 00354 */ 00355 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, 00356 &Yrow, &Ycol ); 00357 00358 if( *INCY == Yd[M_] ) 00359 { 00360 /* 00361 * sub( Y ) resides in (a) process row(s) 00362 */ 00363 if( ( myrow == Yrow ) || ( Yrow < 0 ) ) 00364 { 00365 /* 00366 * Make sure I own some data and scale sub( Y ) 00367 */ 00368 Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], 00369 npcol ); 00370 if( Ynq > 0 ) 00371 { 00372 Yld = Yd[LLD_]; 00373 dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, 00374 Yjj, Yld, usiz ), &Yld ); 00375 } 00376 } 00377 } 00378 else 00379 { 00380 /* 00381 * sub( Y ) resides in (a) process column(s) 00382 */ 00383 if( ( mycol == Ycol ) || ( Ycol < 0 ) ) 00384 { 00385 /* 00386 * Make sure I own some data and scale sub( Y ) 00387 */ 00388 Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], 00389 nprow ); 00390 if( Ynp > 0 ) 00391 { 00392 dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, 00393 Yjj, Yd[LLD_], usiz ), INCY ); 00394 } 00395 } 00396 } 00397 return; 00398 } 00399 /* 00400 * Compute descriptor Ad0 for sub( A ) 00401 */ 00402 PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, 00403 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); 00404 00405 Yroc = ( *INCY == Yd[M_] ? CROW : CCOLUMN ); 00406 00407 if( notran ) 00408 { 00409 /* 00410 * Reuse sub( Y ) and/or create vector YA in process columns spanned by sub( A ) 00411 */ 00412 PB_CInOutV( utyp, COLUMN, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), 00413 Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, 00414 &YAsum, &YApbY ); 00415 /* 00416 * Replicate sub( X ) in process rows spanned by sub( A ) -> XA 00417 */ 00418 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, 00419 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); 00420 } 00421 else 00422 { 00423 /* 00424 * Reuse sub( Y ) and/or create vector YA in process rows spanned by sub( A ) 00425 */ 00426 PB_CInOutV( utyp, ROW, *N, *N, Ad0, 1, ((char *) BETA), ((char *) Y), 00427 Yi, Yj, Yd, &Yroc, ((char**)(&tbeta)), &YA, YAd, &YAfr, 00428 &YAsum, &YApbY ); 00429 /* 00430 * Replicate sub( X ) in process columns spanned by sub( A ) -> XA 00431 */ 00432 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, 00433 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); 00434 } 00435 00436 one = type->one; 00437 /* 00438 * Local matrix-vector multiply iff I own some data 00439 */ 00440 Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ]; Amb = Ad0[MB_]; Anb = Ad0[NB_]; 00441 Acol = Ad0[CSRC_]; Arow = Ad0[RSRC_]; 00442 Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow ); 00443 Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); 00444 00445 if( ( Amp > 0 ) && ( Anq > 0 ) ) 00446 { 00447 Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size ); 00448 00449 XAld = XAd[LLD_]; YAld = YAd[LLD_]; 00450 /* 00451 * Scale YA in the case sub( Y ) has been reused 00452 */ 00453 if( notran && !( YApbY ) ) 00454 { 00455 /* 00456 * YA resides in (a) process column(s) 00457 */ 00458 if( ( mycol == YAd[CSRC_] ) || ( YAd[CSRC_] < 0 ) ) 00459 { 00460 /* 00461 * Make sure I own some data and scale YA 00462 */ 00463 if( Amp > 0 ) 00464 dascal_( &Amp, ((char *) tbeta), YA, &ione ); 00465 } 00466 } 00467 else if( !( notran ) && !( YApbY ) ) 00468 { 00469 /* 00470 * YA resides in (a) process row(s) 00471 */ 00472 if( ( myrow == YAd[RSRC_] ) || ( YAd[RSRC_] < 0 ) ) 00473 { 00474 /* 00475 * Make sure I own some data and scale YA 00476 */ 00477 if( Anq > 0 ) 00478 dascal_( &Anq, ((char *) tbeta), YA, &YAld ); 00479 } 00480 } 00481 /* 00482 * Computational partitioning size is computed as the product of the logical 00483 * value returned by pilaenv_ and 2 * lcm( nprow, npcol ) 00484 */ 00485 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &utyp->type ) ) * 00486 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); 00487 00488 if( upper ) 00489 { 00490 if( notran ) 00491 { 00492 for( k = 0; k < *N; k += nb ) 00493 { 00494 kb = *N - k; kb = MIN( kb, nb ); 00495 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); 00496 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); 00497 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); 00498 if( Akp > 0 && Anq0 > 0 ) 00499 { 00500 dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), 00501 Mptr( Aptr, 0, Akq, Ald, size ), &Ald, 00502 Mptr( XA, 0, Akq, XAld, size ), &XAld, one, 00503 YA, &ione ); 00504 } 00505 PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, 00506 ((char *) ALPHA), Aptr, k, k, Ad0, 00507 Mptr( XA, 0, Akq, XAld, size ), XAld, 00508 Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); 00509 } 00510 } 00511 else 00512 { 00513 for( k = 0; k < *N; k += nb ) 00514 { 00515 kb = *N - k; kb = MIN( kb, nb ); 00516 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); 00517 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); 00518 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); 00519 if( Akp > 0 && Anq0 > 0 ) 00520 { 00521 dagemv_( TRANS, &Akp, &Anq0, ((char *) ALPHA), 00522 Mptr( Aptr, 0, Akq, Ald, size ), &Ald, XA, &ione, 00523 one, Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); 00524 } 00525 PB_Cptrm( type, utyp, LEFT, UPPER, &TranOp, &DiagA, kb, 1, 00526 ((char *) ALPHA), Aptr, k, k, Ad0, 00527 Mptr( XA, Akp, 0, XAld, size ), XAld, 00528 Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); 00529 } 00530 } 00531 } 00532 else 00533 { 00534 if( notran ) 00535 { 00536 for( k = 0; k < *N; k += nb ) 00537 { 00538 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); 00539 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); 00540 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); 00541 PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, 00542 ((char *) ALPHA), Aptr, k, k, Ad0, 00543 Mptr( XA, 0, Akq, XAld, size ), XAld, 00544 Mptr( YA, Akp, 0, YAld, usiz ), YAld, PB_Ctzatrmv ); 00545 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); 00546 Amp0 = Amp - Akp; 00547 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); 00548 if( Amp0 > 0 && Anq0 > 0 ) 00549 { 00550 dagemv_( TRANS, &Amp0, &Anq0, ((char *) ALPHA), 00551 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, 00552 Mptr( XA, 0, Akq, XAld, size ), &XAld, one, 00553 Mptr( YA, Akp, 0, YAld, usiz ), &ione ); 00554 } 00555 } 00556 } 00557 else 00558 { 00559 for( k = 0; k < *N; k += nb ) 00560 { 00561 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) ); 00562 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); 00563 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); 00564 PB_Cptrm( type, utyp, LEFT, LOWER, &TranOp, &DiagA, kb, 1, 00565 ((char *) ALPHA), Aptr, k, k, Ad0, 00566 Mptr( XA, Akp, 0, XAld, size ), XAld, 00567 Mptr( YA, 0, Akq, YAld, usiz ), YAld, PB_Ctzatrmv ); 00568 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); 00569 Amp0 = Amp - Akp; 00570 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); 00571 if( Amp0 > 0 && Anq0 > 0 ) 00572 { 00573 dagemv_( TRANS, &Amp0, &Anq0, one, 00574 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald, 00575 Mptr( XA, Akp, 0, XAld, size ), &ione, one, 00576 Mptr( YA, 0, Akq, YAld, usiz ), &YAld ); 00577 } 00578 } 00579 } 00580 } 00581 } 00582 if( XAfr ) free( XA ); 00583 00584 if( notran ) 00585 { 00586 /* 00587 * Combine the partial column results into YA 00588 */ 00589 if( YAsum && ( Amp > 0 ) ) 00590 { 00591 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); 00592 Cdgsum2d( ctxt, ROW, &top, Amp, 1, YA, YAd[LLD_], myrow, 00593 YAd[CSRC_] ); 00594 } 00595 } 00596 else 00597 { 00598 /* 00599 * Combine the partial row results into YA 00600 */ 00601 if( YAsum && ( Anq > 0 ) ) 00602 { 00603 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); 00604 Cdgsum2d( ctxt, COLUMN, &top, 1, Anq, YA, YAd[LLD_], YAd[RSRC_], 00605 mycol ); 00606 } 00607 } 00608 /* 00609 * sub( Y ) := beta * sub( Y ) + YA (if necessary) 00610 */ 00611 if( YApbY ) 00612 { 00613 /* 00614 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol 00615 */ 00616 PB_Cinfog2l( Yi, Yj, Yd, nprow, npcol, myrow, mycol, &Yii, &Yjj, &Yrow, 00617 &Ycol ); 00618 00619 if( *INCY == Yd[M_] ) 00620 { 00621 /* 00622 * sub( Y ) resides in (a) process row(s) 00623 */ 00624 if( ( myrow == Yrow ) || ( Yrow < 0 ) ) 00625 { 00626 /* 00627 * Make sure I own some data and scale sub( Y ) 00628 */ 00629 Ynq = PB_Cnumroc( *N, Yj, Yd[INB_], Yd[NB_], mycol, Yd[CSRC_], 00630 npcol ); 00631 if( Ynq > 0 ) 00632 { 00633 Yld = Yd[LLD_]; 00634 dascal_( &Ynq, ((char *) BETA), Mptr( ((char *) Y), Yii, 00635 Yjj, Yld, usiz ), &Yld ); 00636 } 00637 } 00638 } 00639 else 00640 { 00641 /* 00642 * sub( Y ) resides in (a) process column(s) 00643 */ 00644 if( ( mycol == Ycol ) || ( Ycol < 0 ) ) 00645 { 00646 /* 00647 * Make sure I own some data and scale sub( Y ) 00648 */ 00649 Ynp = PB_Cnumroc( *N, Yi, Yd[IMB_], Yd[MB_], myrow, Yd[RSRC_], 00650 nprow ); 00651 if( Ynp > 0 ) 00652 { 00653 dascal_( &Ynp, ((char *) BETA), Mptr( ((char *) Y), Yii, 00654 Yjj, Yd[LLD_], usiz ), INCY ); 00655 } 00656 } 00657 } 00658 00659 if( notran ) 00660 { 00661 PB_Cpaxpby( utyp, NOCONJG, *N, 1, one, YA, 0, 0, YAd, COLUMN, one, 00662 ((char *) Y), Yi, Yj, Yd, &Yroc ); 00663 } 00664 else 00665 { 00666 PB_Cpaxpby( utyp, NOCONJG, 1, *N, one, YA, 0, 0, YAd, ROW, one, 00667 ((char *) Y), Yi, Yj, Yd, &Yroc ); 00668 } 00669 } 00670 if( YAfr ) free( YA ); 00671 /* 00672 * End of PDATRMV 00673 */ 00674 }