|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 /* --------------------------------------------------------------------- 00002 * 00003 * -- PBLAS routine (version 2.0) -- 00004 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00005 * and University of California, Berkeley. 00006 * April 1, 1998 00007 * 00008 * --------------------------------------------------------------------- 00009 */ 00010 /* 00011 * Include files 00012 */ 00013 #include "pblas.h" 00014 #include "PBpblas.h" 00015 #include "PBtools.h" 00016 #include "PBblacs.h" 00017 #include "PBblas.h" 00018 00019 #ifdef __STDC__ 00020 void pzgeru_( int * M, int * N, double * ALPHA, 00021 double * X, int * IX, int * JX, int * DESCX, int * INCX, 00022 double * Y, int * IY, int * JY, int * DESCY, int * INCY, 00023 double * A, int * IA, int * JA, int * DESCA ) 00024 #else 00025 void pzgeru_( M, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, 00026 INCY, A, IA, JA, DESCA ) 00027 /* 00028 * .. Scalar Arguments .. 00029 */ 00030 int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY, 00031 * M, * N; 00032 double * ALPHA; 00033 /* 00034 * .. Array Arguments .. 00035 */ 00036 int * DESCA, * DESCX, * DESCY; 00037 double * A, * X, * Y; 00038 #endif 00039 { 00040 /* 00041 * Purpose 00042 * ======= 00043 * 00044 * PZGERU performs the rank 1 operation 00045 * 00046 * sub( A ) := alpha*sub( X )*sub( Y )' + sub( A ), 00047 * 00048 * where 00049 * 00050 * sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1), 00051 * 00052 * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, 00053 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and, 00054 * 00055 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y, 00056 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y. 00057 * 00058 * Alpha is a scalar, sub( X ) is an m element subvector, sub( Y ) is 00059 * an n element subvector and sub( A ) is an m by n submatrix. 00060 * 00061 * Notes 00062 * ===== 00063 * 00064 * A description vector is associated with each 2D block-cyclicly dis- 00065 * tributed matrix. This vector stores the information required to 00066 * establish the mapping between a matrix entry and its corresponding 00067 * process and memory location. 00068 * 00069 * In the following comments, the character _ should be read as 00070 * "of the distributed matrix". Let A be a generic term for any 2D 00071 * block cyclicly distributed matrix. Its description vector is DESC_A: 00072 * 00073 * NOTATION STORED IN EXPLANATION 00074 * ---------------- --------------- ------------------------------------ 00075 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. 00076 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating 00077 * the NPROW x NPCOL BLACS process grid 00078 * A is distributed over. The context 00079 * itself is global, but the handle 00080 * (the integer value) may vary. 00081 * M_A (global) DESCA[ M_ ] The number of rows in the distribu- 00082 * ted matrix A, M_A >= 0. 00083 * N_A (global) DESCA[ N_ ] The number of columns in the distri- 00084 * buted matrix A, N_A >= 0. 00085 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left 00086 * block of the matrix A, IMB_A > 0. 00087 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper 00088 * left block of the matrix A, 00089 * INB_A > 0. 00090 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- 00091 * bute the last M_A-IMB_A rows of A, 00092 * MB_A > 0. 00093 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- 00094 * bute the last N_A-INB_A columns of 00095 * A, NB_A > 0. 00096 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first 00097 * row of the matrix A is distributed, 00098 * NPROW > RSRC_A >= 0. 00099 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the 00100 * first column of A is distributed. 00101 * NPCOL > CSRC_A >= 0. 00102 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local 00103 * array storing the local blocks of 00104 * the distributed matrix A, 00105 * IF( Lc( 1, N_A ) > 0 ) 00106 * LLD_A >= MAX( 1, Lr( 1, M_A ) ) 00107 * ELSE 00108 * LLD_A >= 1. 00109 * 00110 * Let K be the number of rows of a matrix A starting at the global in- 00111 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows 00112 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would 00113 * receive if these K rows were distributed over NPROW processes. If K 00114 * is the number of columns of a matrix A starting at the global index 00115 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- 00116 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if 00117 * these K columns were distributed over NPCOL processes. 00118 * 00119 * The values of Lr() and Lc() may be determined via a call to the func- 00120 * tion PB_Cnumroc: 00121 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) 00122 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) 00123 * 00124 * Arguments 00125 * ========= 00126 * 00127 * M (global input) INTEGER 00128 * On entry, M specifies the number of rows of the submatrix 00129 * sub( A ). M must be at least zero. 00130 * 00131 * N (global input) INTEGER 00132 * On entry, N specifies the number of columns of the submatrix 00133 * sub( A ). N must be at least zero. 00134 * 00135 * ALPHA (global input) COMPLEX*16 00136 * On entry, ALPHA specifies the scalar alpha. When ALPHA is 00137 * supplied as zero then the local entries of the arrays X 00138 * and Y corresponding to the entries of the subvectors sub( X ) 00139 * and sub( Y ) respectively need not be set on input. 00140 * 00141 * X (local input) COMPLEX*16 array 00142 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X 00143 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and 00144 * MAX( 1, Lr( 1, IX+M-1 ) ) otherwise, and, Kx is at least 00145 * Lc( 1, JX+M-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. 00146 * Before entry, this array contains the local entries of the 00147 * matrix X. 00148 * 00149 * IX (global input) INTEGER 00150 * On entry, IX specifies X's global row index, which points to 00151 * the beginning of the submatrix sub( X ). 00152 * 00153 * JX (global input) INTEGER 00154 * On entry, JX specifies X's global column index, which points 00155 * to the beginning of the submatrix sub( X ). 00156 * 00157 * DESCX (global and local input) INTEGER array 00158 * On entry, DESCX is an integer array of dimension DLEN_. This 00159 * is the array descriptor for the matrix X. 00160 * 00161 * INCX (global input) INTEGER 00162 * On entry, INCX specifies the global increment for the 00163 * elements of X. Only two values of INCX are supported in 00164 * this version, namely 1 and M_X. INCX must not be zero. 00165 * 00166 * Y (local input) COMPLEX*16 array 00167 * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y 00168 * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and 00169 * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least 00170 * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise. 00171 * Before entry, this array contains the local entries of the 00172 * matrix Y. 00173 * 00174 * IY (global input) INTEGER 00175 * On entry, IY specifies Y's global row index, which points to 00176 * the beginning of the submatrix sub( Y ). 00177 * 00178 * JY (global input) INTEGER 00179 * On entry, JY specifies Y's global column index, which points 00180 * to the beginning of the submatrix sub( Y ). 00181 * 00182 * DESCY (global and local input) INTEGER array 00183 * On entry, DESCY is an integer array of dimension DLEN_. This 00184 * is the array descriptor for the matrix Y. 00185 * 00186 * INCY (global input) INTEGER 00187 * On entry, INCY specifies the global increment for the 00188 * elements of Y. Only two values of INCY are supported in 00189 * this version, namely 1 and M_Y. INCY must not be zero. 00190 * 00191 * A (local input/local output) COMPLEX*16 array 00192 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is 00193 * at least Lc( 1, JA+N-1 ). Before entry, this array contains 00194 * the local entries of the matrix A. 00195 * On exit, the entries of this array corresponding to the local 00196 * entries of the submatrix sub( A ) are overwritten by the 00197 * local entries of the m by n updated submatrix. 00198 * 00199 * IA (global input) INTEGER 00200 * On entry, IA specifies A's global row index, which points to 00201 * the beginning of the submatrix sub( A ). 00202 * 00203 * JA (global input) INTEGER 00204 * On entry, JA specifies A's global column index, which points 00205 * to the beginning of the submatrix sub( A ). 00206 * 00207 * DESCA (global and local input) INTEGER array 00208 * On entry, DESCA is an integer array of dimension DLEN_. This 00209 * is the array descriptor for the matrix A. 00210 * 00211 * -- Written on April 1, 1998 by 00212 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. 00213 * 00214 * --------------------------------------------------------------------- 00215 */ 00216 /* 00217 * .. Local Scalars .. 00218 */ 00219 int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Ald, Amb, Amp, Anb, 00220 Anq, Arow, XAfr, Xi, Xj, YAfr, Yi, Yj, ctxt, info, ione=1, 00221 mycol, myrow, npcol, nprow; 00222 PBTYP_T * type; 00223 /* 00224 * .. Local Arrays .. 00225 */ 00226 int Ad[DLEN_], Ad0[DLEN_], XAd[DLEN_], Xd[DLEN_], YAd[DLEN_], 00227 Yd[DLEN_]; 00228 char * XA = NULL, * YA = NULL; 00229 /* .. 00230 * .. Executable Statements .. 00231 * 00232 */ 00233 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); 00234 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); 00235 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd ); 00236 #ifndef NO_ARGCHK 00237 /* 00238 * Test the input parameters 00239 */ 00240 Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00241 if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) ) 00242 { 00243 PB_Cchkvec( ctxt, "PZGERU", "X", *M, 1, Xi, Xj, Xd, *INCX, 7, &info ); 00244 PB_Cchkvec( ctxt, "PZGERU", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info ); 00245 PB_Cchkmat( ctxt, "PZGERU", "A", *M, 1, *N, 2, Ai, Aj, Ad, 17, &info ); 00246 } 00247 if( info ) { PB_Cabort( ctxt, "PZGERU", info ); return; } 00248 #endif 00249 /* 00250 * Quick return if possible 00251 */ 00252 if( ( *M == 0 ) || ( *N == 0 ) || 00253 ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ) 00254 return; 00255 /* 00256 * Retrieve process grid information 00257 */ 00258 #ifdef NO_ARGCHK 00259 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00260 #endif 00261 /* 00262 * Get type structure 00263 */ 00264 type = PB_Cztypeset(); 00265 /* 00266 * Compute descriptor Ad0 for sub( A ) 00267 */ 00268 PB_Cdescribe( *M, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj, 00269 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); 00270 00271 /* 00272 * Replicate sub( X ) in process columns spanned by sub( A ) -> XA 00273 */ 00274 PB_CInV( type, NOCONJG, COLUMN, *M, *N, Ad0, 1, ((char *) X), Xi, Xj, Xd, 00275 ( *INCX == Xd[M_] ? ROW : COLUMN ), &XA, XAd, &XAfr ); 00276 /* 00277 * Replicate sub( Y ) in process rows spanned by sub( A ) -> YA 00278 */ 00279 PB_CInV( type, NOCONJG, ROW, *M, *N, Ad0, 1, ((char *) Y), Yi, Yj, Yd, 00280 ( *INCY == Yd[M_] ? ROW : COLUMN ), &YA, YAd, &YAfr ); 00281 /* 00282 * Local rank-1 update iff I own some data 00283 */ 00284 Amp = PB_Cnumroc( *M, 0, Aimb1, Amb, myrow, Arow, nprow ); 00285 Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol ); 00286 00287 if( ( Amp > 0 ) && ( Anq > 0 ) ) 00288 { 00289 zgeru_( &Amp, &Anq, ((char *) ALPHA), XA, &ione, YA, &YAd[LLD_], 00290 Mptr( ((char *) A), Aii, Ajj, Ald, type->size ), &Ald ); 00291 } 00292 if( XAfr ) free( XA ); 00293 if( YAfr ) free( YA ); 00294 /* 00295 * End of PZGERU 00296 */ 00297 }