|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 /* --------------------------------------------------------------------- 00002 * 00003 * -- PBLAS routine (version 2.0) -- 00004 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00005 * and University of California, Berkeley. 00006 * April 1, 1998 00007 * 00008 * --------------------------------------------------------------------- 00009 */ 00010 /* 00011 * Include files 00012 */ 00013 #include "pblas.h" 00014 #include "PBpblas.h" 00015 #include "PBtools.h" 00016 #include "PBblacs.h" 00017 #include "PBblas.h" 00018 00019 #ifdef __STDC__ 00020 void pctradd_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * M, int * N, 00021 float * ALPHA, 00022 float * A, int * IA, int * JA, int * DESCA, 00023 float * BETA, 00024 float * C, int * IC, int * JC, int * DESCC ) 00025 #else 00026 void pctradd_( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA, 00027 C, IC, JC, DESCC ) 00028 /* 00029 * .. Scalar Arguments .. 00030 */ 00031 F_CHAR_T TRANS, UPLO; 00032 int * IA, * IC, * JA, * JC, * M, * N; 00033 float * ALPHA, * BETA; 00034 /* 00035 * .. Array Arguments .. 00036 */ 00037 int * DESCA, * DESCC; 00038 float * A, * C; 00039 #endif 00040 { 00041 /* 00042 * Purpose 00043 * ======= 00044 * 00045 * PCTRADD adds a trapezoidal matrix to another 00046 * 00047 * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) 00048 * 00049 * where 00050 * 00051 * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of 00052 * 00053 * op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ). 00054 * 00055 * Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if TRANS = 'N', 00056 * A(IA:IA+N-1,JA:JA+M-1)' if TRANS = 'T', 00057 * conjg(A(IA:IA+N-1,JA:JA+M-1)') if TRANS = 'C', 00058 * 00059 * Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n 00060 * upper or lower trapezoidal submatrices. 00061 * 00062 * Notes 00063 * ===== 00064 * 00065 * A description vector is associated with each 2D block-cyclicly dis- 00066 * tributed matrix. This vector stores the information required to 00067 * establish the mapping between a matrix entry and its corresponding 00068 * process and memory location. 00069 * 00070 * In the following comments, the character _ should be read as 00071 * "of the distributed matrix". Let A be a generic term for any 2D 00072 * block cyclicly distributed matrix. Its description vector is DESC_A: 00073 * 00074 * NOTATION STORED IN EXPLANATION 00075 * ---------------- --------------- ------------------------------------ 00076 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. 00077 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating 00078 * the NPROW x NPCOL BLACS process grid 00079 * A is distributed over. The context 00080 * itself is global, but the handle 00081 * (the integer value) may vary. 00082 * M_A (global) DESCA[ M_ ] The number of rows in the distribu- 00083 * ted matrix A, M_A >= 0. 00084 * N_A (global) DESCA[ N_ ] The number of columns in the distri- 00085 * buted matrix A, N_A >= 0. 00086 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left 00087 * block of the matrix A, IMB_A > 0. 00088 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper 00089 * left block of the matrix A, 00090 * INB_A > 0. 00091 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- 00092 * bute the last M_A-IMB_A rows of A, 00093 * MB_A > 0. 00094 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- 00095 * bute the last N_A-INB_A columns of 00096 * A, NB_A > 0. 00097 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first 00098 * row of the matrix A is distributed, 00099 * NPROW > RSRC_A >= 0. 00100 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the 00101 * first column of A is distributed. 00102 * NPCOL > CSRC_A >= 0. 00103 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local 00104 * array storing the local blocks of 00105 * the distributed matrix A, 00106 * IF( Lc( 1, N_A ) > 0 ) 00107 * LLD_A >= MAX( 1, Lr( 1, M_A ) ) 00108 * ELSE 00109 * LLD_A >= 1. 00110 * 00111 * Let K be the number of rows of a matrix A starting at the global in- 00112 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows 00113 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would 00114 * receive if these K rows were distributed over NPROW processes. If K 00115 * is the number of columns of a matrix A starting at the global index 00116 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- 00117 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if 00118 * these K columns were distributed over NPCOL processes. 00119 * 00120 * The values of Lr() and Lc() may be determined via a call to the func- 00121 * tion PB_Cnumroc: 00122 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) 00123 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) 00124 * 00125 * Arguments 00126 * ========= 00127 * 00128 * UPLO (global input) CHARACTER*1 00129 * On entry, UPLO specifies whether the local pieces of the 00130 * array C containing the upper or lower triangular part of the 00131 * triangular submatrix sub( C ) is to be referenced as follows: 00132 * 00133 * UPLO = 'U' or 'u' Only the local pieces corresponding to 00134 * the upper triangular part of the 00135 * triangular submatrix sub( C ) is to be 00136 * referenced, 00137 * 00138 * UPLO = 'L' or 'l' Only the local pieces corresponding to 00139 * the lower triangular part of the 00140 * triangular submatrix sub( C ) is to be 00141 * referenced. 00142 * 00143 * TRANS (global input) CHARACTER*1 00144 * On entry, TRANS specifies the form of op( sub( A ) ) to be 00145 * used in the matrix addition as follows: 00146 * 00147 * TRANS = 'N' or 'n' op( sub( A ) ) = sub( A ), 00148 * 00149 * TRANS = 'T' or 't' op( sub( A ) ) = sub( A )', 00150 * 00151 * TRANS = 'C' or 'c' op( sub( A ) ) = conjg( sub( A )' ). 00152 * 00153 * M (global input) INTEGER 00154 * On entry, M specifies the number of rows of the submatrix 00155 * sub( C ) and the number of columns of the submatrix sub( A ). 00156 * M must be at least zero. 00157 * 00158 * N (global input) INTEGER 00159 * On entry, N specifies the number of columns of the submatrix 00160 * sub( C ) and the number of rows of the submatrix sub( A ). N 00161 * must be at least zero. 00162 * 00163 * ALPHA (global input) COMPLEX 00164 * On entry, ALPHA specifies the scalar alpha. When ALPHA is 00165 * supplied as zero then the local entries of the array A 00166 * corresponding to the entries of the submatrix sub( A ) need 00167 * not be set on input. 00168 * 00169 * A (local input) COMPLEX array 00170 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is 00171 * at least Lc( 1, JA+N-1 ) when TRANS = 'N' or 'n' and is at 00172 * least Lc( 1, JA+M-1 ) otherwise. Before entry, this array 00173 * contains the local entries of the matrix A. 00174 * Before entry with UPLO = 'U' or 'u' and TRANS = 'N' or 'n' or 00175 * UPLO = 'L' or 'l' and TRANS = 'T', 'C', 't' or 'c', this ar- 00176 * ray contains the local entries corresponding to the entries 00177 * of the upper triangular submatrix sub( A ), and the local en- 00178 * tries corresponding to the entries of the strictly lower tri- 00179 * angular part of the submatrix sub( A ) are not referenced. 00180 * Before entry with UPLO = 'L' or 'l' and TRANS = 'N' or 'n' or 00181 * UPLO = 'U' or 'u' and TRANS = 'T', 'C', 't' or 'c', this ar- 00182 * ray contains the local entries corresponding to the entries 00183 * of the lower triangular submatrix sub( A ), and the local en- 00184 * tries corresponding to the entries of the strictly upper tri- 00185 * angular part of the submatrix sub( A ) are not referenced. 00186 * 00187 * IA (global input) INTEGER 00188 * On entry, IA specifies A's global row index, which points to 00189 * the beginning of the submatrix sub( A ). 00190 * 00191 * JA (global input) INTEGER 00192 * On entry, JA specifies A's global column index, which points 00193 * to the beginning of the submatrix sub( A ). 00194 * 00195 * DESCA (global and local input) INTEGER array 00196 * On entry, DESCA is an integer array of dimension DLEN_. This 00197 * is the array descriptor for the matrix A. 00198 * 00199 * BETA (global input) COMPLEX 00200 * On entry, BETA specifies the scalar beta. When BETA is 00201 * supplied as zero then the local entries of the array C 00202 * corresponding to the entries of the submatrix sub( C ) need 00203 * not be set on input. 00204 * 00205 * C (local input/local output) COMPLEX array 00206 * On entry, C is an array of dimension (LLD_C, Kc), where Kc is 00207 * at least Lc( 1, JC+N-1 ). Before entry, this array contains 00208 * the local entries of the matrix C. 00209 * Before entry with UPLO = 'U' or 'u', this array contains 00210 * the local entries corresponding to the upper triangular part 00211 * of the triangular submatrix sub( C ), and the local entries 00212 * corresponding to the strictly lower triangular of sub( C ) 00213 * are not referenced. On exit, the upper triangular part of 00214 * sub( C ) is overwritten by the upper triangular part of the 00215 * updated submatrix. 00216 * Before entry with UPLO = 'L' or 'l', this array contains 00217 * the local entries corresponding to the lower triangular part 00218 * of the triangular submatrix sub( C ), and the local entries 00219 * corresponding to the strictly upper triangular of sub( C ) 00220 * are not referenced. On exit, the lower triangular part of 00221 * sub( C ) is overwritten by the lower triangular part of the 00222 * updated submatrix. 00223 * 00224 * IC (global input) INTEGER 00225 * On entry, IC specifies C's global row index, which points to 00226 * the beginning of the submatrix sub( C ). 00227 * 00228 * JC (global input) INTEGER 00229 * On entry, JC specifies C's global column index, which points 00230 * to the beginning of the submatrix sub( C ). 00231 * 00232 * DESCC (global and local input) INTEGER array 00233 * On entry, DESCC is an integer array of dimension DLEN_. This 00234 * is the array descriptor for the matrix C. 00235 * 00236 * -- Written on April 1, 1998 by 00237 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. 00238 * 00239 * --------------------------------------------------------------------- 00240 */ 00241 /* 00242 * .. Local Scalars .. 00243 */ 00244 char DirAC, TranOp, UploC, ctop, rtop; 00245 int Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, notran, npcol, 00246 nprow, upper; 00247 /* 00248 * .. Local Arrays .. 00249 */ 00250 int Ad[DLEN_], Cd[DLEN_]; 00251 /* .. 00252 * .. Executable Statements .. 00253 * 00254 */ 00255 upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER ); 00256 notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN ); 00257 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad ); 00258 PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd ); 00259 #ifndef NO_ARGCHK 00260 /* 00261 * Test the input parameters 00262 */ 00263 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00264 if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) ) 00265 { 00266 if( ( !upper ) && ( UploC != CLOWER ) ) 00267 { 00268 PB_Cwarn( ctxt, __LINE__, "PCTRADD", "Illegal UPLO = %c\n", UploC ); 00269 info = -1; 00270 } 00271 else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) ) 00272 { 00273 PB_Cwarn( ctxt, __LINE__, "PCTRADD", "Illegal TRANS = %c\n", TranOp ); 00274 info = -2; 00275 } 00276 if( notran ) 00277 PB_Cchkmat( ctxt, "PCTRADD", "A", *M, 3, *N, 4, Ai, Aj, Ad, 9, 00278 &info ); 00279 else 00280 PB_Cchkmat( ctxt, "PCTRADD", "A", *N, 4, *M, 3, Ai, Aj, Ad, 9, 00281 &info ); 00282 PB_Cchkmat( ctxt, "PCTRADD", "C", *M, 3, *N, 4, Ci, Cj, Cd, 14, 00283 &info ); 00284 } 00285 if( info ) { PB_Cabort( ctxt, "PCTRADD", info ); return; } 00286 #endif 00287 /* 00288 * Quick return if possible 00289 */ 00290 if( ( *M == 0 ) || ( *N == 0 ) || 00291 ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) && 00292 ( BETA [REAL_PART] == ONE && BETA [IMAG_PART] == ZERO ) ) ) 00293 return; 00294 /* 00295 * And when alpha is zero 00296 */ 00297 if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) 00298 { 00299 if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) ) 00300 { 00301 PB_Cplapad( PB_Cctypeset(), &UploC, NOCONJG, *M, *N, 00302 ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd ); 00303 } 00304 else 00305 { 00306 PB_Cplascal( PB_Cctypeset(), &UploC, NOCONJG, *M, *N, 00307 ((char *)BETA), ((char * )C), Ci, Cj, Cd ); 00308 } 00309 return; 00310 } 00311 /* 00312 * Start the operations 00313 */ 00314 /* 00315 * This operation mainly involves point-to-point send and receive communication. 00316 * There is therefore no particular BLACS topology to recommend. Still, one can 00317 * choose the main loop direction in which the operands will be added, but not 00318 * transposed. This selection is based on the current setting for the BLACS 00319 * broadcast operations. 00320 */ 00321 rtop = *PB_Ctop( &ctxt, BCAST, ROW, TOP_GET ); 00322 ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET ); 00323 00324 if( *M <= *N ) 00325 DirAC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD ); 00326 else 00327 DirAC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD ); 00328 PB_Cptradd( PB_Cctypeset(), &DirAC, &UploC, ( notran ? NOTRAN : 00329 ( ( TranOp == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N, 00330 ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *) BETA), 00331 ((char *) C), Ci, Cj, Cd ); 00332 /* 00333 * End of PCTRADD 00334 */ 00335 }