|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 /* --------------------------------------------------------------------- 00002 * 00003 * -- PBLAS routine (version 2.0) -- 00004 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00005 * and University of California, Berkeley. 00006 * April 1, 1998 00007 * 00008 * --------------------------------------------------------------------- 00009 */ 00010 /* 00011 * Include files 00012 */ 00013 #include "pblas.h" 00014 #include "PBpblas.h" 00015 #include "PBtools.h" 00016 #include "PBblacs.h" 00017 #include "PBblas.h" 00018 00019 #ifdef __STDC__ 00020 void pscasum_( int * N, float * ASUM, 00021 float * X, int * IX, int * JX, int * DESCX, int * INCX ) 00022 #else 00023 void pscasum_( N, ASUM, X, IX, JX, DESCX, INCX ) 00024 /* 00025 * .. Scalar Arguments .. 00026 */ 00027 int * INCX, * IX, * JX, * N; 00028 float * ASUM; 00029 /* 00030 * .. Array Arguments .. 00031 */ 00032 int * DESCX; 00033 float * X; 00034 #endif 00035 { 00036 /* 00037 * Purpose 00038 * ======= 00039 * 00040 * PSCASUM returns the sum of absolute values of the entries of a sub- 00041 * vector sub( X ), 00042 * 00043 * where 00044 * 00045 * sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X, 00046 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X. 00047 * 00048 * Notes 00049 * ===== 00050 * 00051 * A description vector is associated with each 2D block-cyclicly dis- 00052 * tributed matrix. This vector stores the information required to 00053 * establish the mapping between a matrix entry and its corresponding 00054 * process and memory location. 00055 * 00056 * In the following comments, the character _ should be read as 00057 * "of the distributed matrix". Let A be a generic term for any 2D 00058 * block cyclicly distributed matrix. Its description vector is DESC_A: 00059 * 00060 * NOTATION STORED IN EXPLANATION 00061 * ---------------- --------------- ------------------------------------ 00062 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. 00063 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating 00064 * the NPROW x NPCOL BLACS process grid 00065 * A is distributed over. The context 00066 * itself is global, but the handle 00067 * (the integer value) may vary. 00068 * M_A (global) DESCA[ M_ ] The number of rows in the distribu- 00069 * ted matrix A, M_A >= 0. 00070 * N_A (global) DESCA[ N_ ] The number of columns in the distri- 00071 * buted matrix A, N_A >= 0. 00072 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left 00073 * block of the matrix A, IMB_A > 0. 00074 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper 00075 * left block of the matrix A, 00076 * INB_A > 0. 00077 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- 00078 * bute the last M_A-IMB_A rows of A, 00079 * MB_A > 0. 00080 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- 00081 * bute the last N_A-INB_A columns of 00082 * A, NB_A > 0. 00083 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first 00084 * row of the matrix A is distributed, 00085 * NPROW > RSRC_A >= 0. 00086 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the 00087 * first column of A is distributed. 00088 * NPCOL > CSRC_A >= 0. 00089 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local 00090 * array storing the local blocks of 00091 * the distributed matrix A, 00092 * IF( Lc( 1, N_A ) > 0 ) 00093 * LLD_A >= MAX( 1, Lr( 1, M_A ) ) 00094 * ELSE 00095 * LLD_A >= 1. 00096 * 00097 * Let K be the number of rows of a matrix A starting at the global in- 00098 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows 00099 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would 00100 * receive if these K rows were distributed over NPROW processes. If K 00101 * is the number of columns of a matrix A starting at the global index 00102 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- 00103 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if 00104 * these K columns were distributed over NPCOL processes. 00105 * 00106 * The values of Lr() and Lc() may be determined via a call to the func- 00107 * tion PB_Cnumroc: 00108 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) 00109 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) 00110 * 00111 * Arguments 00112 * ========= 00113 * 00114 * N (global input) INTEGER 00115 * On entry, N specifies the length of the subvector sub( X ). 00116 * N must be at least zero. 00117 * 00118 * ASUM (local output) REAL 00119 * On exit, ASUM specifies the sum of absolute values of the 00120 * subvector sub( X ) only in its scope (See below for further 00121 * details). 00122 * 00123 * X (local input) COMPLEX array 00124 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X 00125 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and 00126 * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least 00127 * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise. 00128 * Before entry, this array contains the local entries of the 00129 * matrix X. 00130 * 00131 * IX (global input) INTEGER 00132 * On entry, IX specifies X's global row index, which points to 00133 * the beginning of the submatrix sub( X ). 00134 * 00135 * JX (global input) INTEGER 00136 * On entry, JX specifies X's global column index, which points 00137 * to the beginning of the submatrix sub( X ). 00138 * 00139 * DESCX (global and local input) INTEGER array 00140 * On entry, DESCX is an integer array of dimension DLEN_. This 00141 * is the array descriptor for the matrix X. 00142 * 00143 * INCX (global input) INTEGER 00144 * On entry, INCX specifies the global increment for the 00145 * elements of X. Only two values of INCX are supported in 00146 * this version, namely 1 and M_X. INCX must not be zero. 00147 * 00148 * Further Details 00149 * =============== 00150 * 00151 * When the result of a vector-oriented PBLAS call is a scalar, this 00152 * scalar is set only within the process scope which owns the vector(s) 00153 * being operated on. Let sub( X ) be a generic term for the input vec- 00154 * tor(s). Then, the processes owning the correct the answer is determi- 00155 * ned as follows: if an operation involves more than one vector, the 00156 * processes receiving the result will be the union of the following set 00157 * of processes for each vector: 00158 * 00159 * If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro- 00160 * cess row or process column owns the vector operand, therefore only 00161 * the process owning sub( X ) receives the correct result; 00162 * 00163 * If INCX = M_X, then sub( X ) is a vector distributed over a process 00164 * row. Each process in this row receives the result; 00165 * 00166 * If INCX = 1, then sub( X ) is a vector distributed over a process 00167 * column. Each process in this column receives the result; 00168 * 00169 * -- Written on April 1, 1998 by 00170 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. 00171 * 00172 * --------------------------------------------------------------------- 00173 */ 00174 /* 00175 * .. Local Scalars .. 00176 */ 00177 char * Xptr = NULL, top; 00178 int Xcol, Xi, Xii, Xj, Xjj, Xld, Xnp, Xnq, Xrow, ctxt, info, 00179 mycol, myrow, npcol, nprow; 00180 PBTYP_T * type; 00181 /* 00182 * .. Local Arrays .. 00183 */ 00184 int Xd[DLEN_]; 00185 /* .. 00186 * .. Executable Statements .. 00187 * 00188 */ 00189 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd ); 00190 #ifndef NO_ARGCHK 00191 /* 00192 * Test the input parameters 00193 */ 00194 Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00195 if( !( info = ( ( nprow == -1 ) ? -( 601 + CTXT_ ) : 0 ) ) ) 00196 PB_Cchkvec( ctxt, "PSCASUM", "X", *N, 1, Xi, Xj, Xd, *INCX, 6, &info ); 00197 if( info ) { PB_Cabort( ctxt, "PSCASUM", info ); return; } 00198 #endif 00199 /* 00200 * Initialize ASUM 00201 */ 00202 *ASUM = ZERO; 00203 /* 00204 * Quick return if possible 00205 */ 00206 if( *N == 0 ) return; 00207 /* 00208 * Retrieve process grid information 00209 */ 00210 #ifdef NO_ARGCHK 00211 Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00212 #endif 00213 /* 00214 * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol 00215 */ 00216 PB_Cinfog2l( Xi, Xj, Xd, nprow, npcol, myrow, mycol, &Xii, &Xjj, 00217 &Xrow, &Xcol ); 00218 /* 00219 * Handle degenerate case separately, sub( X )'s scope is just one process 00220 */ 00221 if( ( *N == 1 ) && ( *INCX == 1 ) && ( Xd[M_] == 1 ) ) 00222 { 00223 /* 00224 * Make sure I own some data and compute ASUM 00225 */ 00226 if( ( ( myrow == Xrow ) || ( Xrow < 0 ) ) && 00227 ( ( mycol == Xcol ) || ( Xcol < 0 ) ) ) 00228 { 00229 type = PB_Cctypeset(); 00230 Xptr = Mptr( ((char *) X), Xii, Xjj, Xd[LLD_], type->size ); 00231 *ASUM = ABS( ((float *) Xptr)[REAL_PART] ) + 00232 ABS( ((float *) Xptr)[IMAG_PART] ); 00233 } 00234 return; 00235 } 00236 else if( *INCX == Xd[M_] ) 00237 { 00238 /* 00239 * sub( X ) resides in (a) process row(s) 00240 */ 00241 if( ( myrow == Xrow ) || ( Xrow < 0 ) ) 00242 { 00243 /* 00244 * Make sure I own some data and compute the local sum 00245 */ 00246 Xnq = PB_Cnumroc( *N, Xj, Xd[INB_], Xd[NB_], mycol, Xd[CSRC_], npcol ); 00247 if( Xnq > 0 ) 00248 { 00249 Xld = Xd[LLD_]; 00250 type = PB_Cctypeset(); 00251 scvasum_( &Xnq, ((char *) ASUM), Mptr( ((char *) X), Xii, 00252 Xjj, Xld, type->size ), &Xld ); 00253 } 00254 /* 00255 * If Xnq <= 0, ASUM is zero (see initialization above) 00256 */ 00257 if( ( npcol > 1 ) && ( Xcol >= 0 ) ) 00258 { 00259 /* 00260 * Combine the local results if npcol > 1 and Xcol >= 0, i.e sub( X ) is 00261 * distributed. 00262 */ 00263 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET ); 00264 Csgsum2d( ctxt, ROW, &top, 1, 1, ((char *)ASUM), 1, -1, 00265 mycol ); 00266 } 00267 } 00268 return; 00269 } 00270 else 00271 { 00272 /* 00273 * sub( X ) resides in (a) process column(s) 00274 */ 00275 if( ( mycol == Xcol ) || ( Xcol < 0 ) ) 00276 { 00277 /* 00278 * Make sure I own some data and compute the local sum 00279 */ 00280 Xnp = PB_Cnumroc( *N, Xi, Xd[IMB_], Xd[MB_], myrow, Xd[RSRC_], nprow ); 00281 if( Xnp > 0 ) 00282 { 00283 type = PB_Cctypeset(); 00284 scvasum_( &Xnp, ((char *) ASUM), Mptr( ((char *) X), Xii, 00285 Xjj, Xd[LLD_], type->size ), INCX ); 00286 } 00287 /* 00288 * If Xnp <= 0, ASUM is zero (see initialization above) 00289 */ 00290 if( ( nprow > 1 ) && ( Xrow >= 0 ) ) 00291 { 00292 /* 00293 * Combine the local results if nprow > 1 and Xrow >= 0, i.e sub( X ) is 00294 * distributed. 00295 */ 00296 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET ); 00297 Csgsum2d( ctxt, COLUMN, &top, 1, 1, ((char *)ASUM), 1, -1, 00298 mycol ); 00299 } 00300 } 00301 return; 00302 } 00303 /* 00304 * End of PSCASUM 00305 */ 00306 }