|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 /* --------------------------------------------------------------------- 00002 * 00003 * -- PBLAS auxiliary routine (version 2.0) -- 00004 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00005 * and University of California, Berkeley. 00006 * April 1, 1998 00007 * 00008 * --------------------------------------------------------------------- 00009 */ 00010 /* 00011 * Include files 00012 */ 00013 #include "../pblas.h" 00014 #include "../PBpblas.h" 00015 #include "../PBtools.h" 00016 #include "../PBblacs.h" 00017 #include "../PBblas.h" 00018 00019 #ifdef __STDC__ 00020 void PB_Cplapad( PBTYP_T * TYPE, char * UPLO, char * CONJUG, int M, 00021 int N, char * ALPHA, char * BETA, char * A, int IA, 00022 int JA, int * DESCA ) 00023 #else 00024 void PB_Cplapad( TYPE, UPLO, CONJUG, M, N, ALPHA, BETA, A, IA, JA, 00025 DESCA ) 00026 /* 00027 * .. Scalar Arguments .. 00028 */ 00029 char * CONJUG, * UPLO; 00030 int IA, JA, M, N; 00031 char * ALPHA, * BETA; 00032 PBTYP_T * TYPE; 00033 /* 00034 * .. Array Arguments .. 00035 */ 00036 int * DESCA; 00037 char * A; 00038 #endif 00039 { 00040 /* 00041 * Purpose 00042 * ======= 00043 * 00044 * PB_Cplapad initializes an m by n submatrix A(IA:IA+M-1,JA:JA+N-1) 00045 * denoted by sub( A ) to beta on the diagonal or zeros the imaginary 00046 * part of those diagonals and set the offdiagonals to alpha. 00047 * 00048 * Notes 00049 * ===== 00050 * 00051 * A description vector is associated with each 2D block-cyclicly dis- 00052 * tributed matrix. This vector stores the information required to 00053 * establish the mapping between a matrix entry and its corresponding 00054 * process and memory location. 00055 * 00056 * In the following comments, the character _ should be read as 00057 * "of the distributed matrix". Let A be a generic term for any 2D 00058 * block cyclicly distributed matrix. Its description vector is DESC_A: 00059 * 00060 * NOTATION STORED IN EXPLANATION 00061 * ---------------- --------------- ------------------------------------ 00062 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. 00063 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating 00064 * the NPROW x NPCOL BLACS process grid 00065 * A is distributed over. The context 00066 * itself is global, but the handle 00067 * (the integer value) may vary. 00068 * M_A (global) DESCA[ M_ ] The number of rows in the distribu- 00069 * ted matrix A, M_A >= 0. 00070 * N_A (global) DESCA[ N_ ] The number of columns in the distri- 00071 * buted matrix A, N_A >= 0. 00072 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left 00073 * block of the matrix A, IMB_A > 0. 00074 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper 00075 * left block of the matrix A, 00076 * INB_A > 0. 00077 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- 00078 * bute the last M_A-IMB_A rows of A, 00079 * MB_A > 0. 00080 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- 00081 * bute the last N_A-INB_A columns of 00082 * A, NB_A > 0. 00083 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first 00084 * row of the matrix A is distributed, 00085 * NPROW > RSRC_A >= 0. 00086 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the 00087 * first column of A is distributed. 00088 * NPCOL > CSRC_A >= 0. 00089 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local 00090 * array storing the local blocks of 00091 * the distributed matrix A, 00092 * IF( Lc( 1, N_A ) > 0 ) 00093 * LLD_A >= MAX( 1, Lr( 1, M_A ) ) 00094 * ELSE 00095 * LLD_A >= 1. 00096 * 00097 * Let K be the number of rows of a matrix A starting at the global in- 00098 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows 00099 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would 00100 * receive if these K rows were distributed over NPROW processes. If K 00101 * is the number of columns of a matrix A starting at the global index 00102 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- 00103 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if 00104 * these K columns were distributed over NPCOL processes. 00105 * 00106 * The values of Lr() and Lc() may be determined via a call to the func- 00107 * tion PB_Cnumroc: 00108 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) 00109 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) 00110 * 00111 * Arguments 00112 * ========= 00113 * 00114 * TYPE (local input) pointer to a PBTYP_T structure 00115 * On entry, TYPE is a pointer to a structure of type PBTYP_T, 00116 * that contains type information (See pblas.h). 00117 * 00118 * UPLO (global input) pointer to CHAR 00119 * On entry, UPLO specifies the part of the submatrix sub( A ) 00120 * to be set: 00121 * = 'L' or 'l': Lower triangular part is set; the strictly 00122 * upper triangular part of sub( A ) is not changed; 00123 * = 'U' or 'u': Upper triangular part is set; the strictly 00124 * lower triangular part of sub( A ) is not changed; 00125 * Otherwise: All of the matrix sub( A ) is set. 00126 * 00127 * CONJUG (global input) pointer to CHAR 00128 * On entry, CONJUG specifies what should be done to the diago- 00129 * nals as follows. When UPLO is 'L', 'l', 'U' or 'u' and CONJUG 00130 * is 'Z' or 'z', the imaginary part of the diagonals is set to 00131 * zero. Otherwise, the diagonals are set to beta. 00132 * 00133 * M (global input) INTEGER 00134 * On entry, M specifies the number of rows of the submatrix 00135 * sub( A ). M must be at least zero. 00136 * 00137 * N (global input) INTEGER 00138 * On entry, N specifies the number of columns of the submatrix 00139 * sub( A ). N must be at least zero. 00140 * 00141 * ALPHA (global input) pointer to CHAR 00142 * On entry, ALPHA specifies the scalar alpha, i.e., the cons- 00143 * tant to which the offdiagonal elements are to be set. 00144 * 00145 * BETA (global input) pointer to CHAR 00146 * On entry, BETA specifies the scalar beta, i.e., the constant 00147 * to which the diagonal elements are to be set. BETA is not re- 00148 * ferenced when UPLO is 'L', 'l', 'U' or 'u' and CONJUG is 'Z'. 00149 * 00150 * A (local input/local output) pointer to CHAR 00151 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is 00152 * at least Lc( 1, JA+N-1 ). Before entry, this array contains 00153 * the local entries of the matrix A to be set. On exit, the 00154 * leading m by n submatrix sub( A ) is set as follows: 00155 * 00156 * UPLO = 'L' or 'l', A(IA+i-1,JA+j-1)=ALPHA, j+1<=i<=M, 1<=j<=N 00157 * UPLO = 'U' or 'u', A(IA+i-1,JA+j-1)=ALPHA, 1<=i<=j-1, 1<=j<=N 00158 * otherwise, A(IA+i-1,JA+j-1)=ALPHA, 1<=i<=M, 1<=j<=N 00159 * and IA+i.NE.JA+j, 00160 * and, for all UPLO, A(IA+i-1,JA+i-1) = BETA, 1<=i<=min(M,N). 00161 * 00162 * IA (global input) INTEGER 00163 * On entry, IA specifies A's global row index, which points to 00164 * the beginning of the submatrix sub( A ). 00165 * 00166 * JA (global input) INTEGER 00167 * On entry, JA specifies A's global column index, which points 00168 * to the beginning of the submatrix sub( A ). 00169 * 00170 * DESCA (global and local input) INTEGER array 00171 * On entry, DESCA is an integer array of dimension DLEN_. This 00172 * is the array descriptor for the matrix A. 00173 * 00174 * -- Written on April 1, 1998 by 00175 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. 00176 * 00177 * --------------------------------------------------------------------- 00178 */ 00179 /* 00180 * .. Local Scalars .. 00181 */ 00182 char type; 00183 int Acol, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0, 00184 Anb, Anq, Anq0, Arow, ctxt, izero=0, k, kb, ktmp, mn, mycol, 00185 myrow, nb, npcol, nprow, size; 00186 TZPAD_T pad; 00187 /* 00188 * .. Local Arrays .. 00189 */ 00190 int Ad0[DLEN_]; 00191 char * Aptr = NULL; 00192 /* .. 00193 * .. Executable Statements .. 00194 * 00195 */ 00196 /* 00197 * Quick return if possible 00198 */ 00199 if( ( M <= 0 ) || ( N <= 0 ) ) return; 00200 /* 00201 * Retrieve process grid information 00202 */ 00203 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00204 /* 00205 * Compute descriptor Ad0 for sub( A ) 00206 */ 00207 PB_Cdescribe( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, 00208 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 ); 00209 /* 00210 * Quick return if I don't own any of sub( A ). 00211 */ 00212 Amp = PB_Cnumroc( M, 0, Aimb1, Amb, myrow, Arow, nprow ); 00213 Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol ); 00214 if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; 00215 00216 size = TYPE->size; type = TYPE->type; pad = TYPE->Ftzpad; 00217 Aptr = Mptr( A, Aii, Ajj, Ald, size ); 00218 /* 00219 * When the entire sub( A ) needs to be padded and alpha is equal to beta, or 00220 * sub( A ) is replicated in all processes, just call the local routine. 00221 */ 00222 if( type == SREAL ) 00223 { 00224 if( ( ( Mupcase( UPLO[0] ) == CALL ) && 00225 ( ((float*)(ALPHA))[REAL_PART] == 00226 ((float*)(BETA ))[REAL_PART] ) ) || 00227 ( ( ( Arow < 0 ) || ( nprow == 1 ) ) && 00228 ( ( Acol < 0 ) || ( npcol == 1 ) ) ) ) 00229 { 00230 pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA, 00231 BETA, Aptr, &Ald ); 00232 return; 00233 } 00234 } 00235 else if( type == DREAL ) 00236 { 00237 if( ( ( Mupcase( UPLO[0] ) == CALL ) && 00238 ( ((double*)(ALPHA))[REAL_PART] == 00239 ((double*)(BETA ))[REAL_PART] ) ) || 00240 ( ( ( Arow < 0 ) || ( nprow == 1 ) ) && 00241 ( ( Acol < 0 ) || ( npcol == 1 ) ) ) ) 00242 { 00243 pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA, 00244 BETA, Aptr, &Ald ); 00245 return; 00246 } 00247 } 00248 else if( type == SCPLX ) 00249 { 00250 if( ( ( Mupcase( UPLO[0] ) == CALL ) && 00251 ( ((float*)(ALPHA))[REAL_PART] == 00252 ((float*)(BETA ))[REAL_PART] ) && 00253 ( ((float*)(ALPHA))[IMAG_PART] == 00254 ((float*)(BETA ))[IMAG_PART] ) ) || 00255 ( ( ( Arow < 0 ) || ( nprow == 1 ) ) && 00256 ( ( Acol < 0 ) || ( npcol == 1 ) ) ) ) 00257 { 00258 pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA, 00259 BETA, Aptr, &Ald ); 00260 return; 00261 } 00262 } 00263 else if( type == DCPLX ) 00264 { 00265 if( ( ( Mupcase( UPLO[0] ) == CALL ) && 00266 ( ((double*)(ALPHA))[REAL_PART] == 00267 ((double*)(BETA ))[REAL_PART] ) && 00268 ( ((double*)(ALPHA))[IMAG_PART] == 00269 ((double*)(BETA ))[IMAG_PART] ) ) || 00270 ( ( ( Arow < 0 ) || ( nprow == 1 ) ) && 00271 ( ( Acol < 0 ) || ( npcol == 1 ) ) ) ) 00272 { 00273 pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA, 00274 BETA, Aptr, &Ald ); 00275 return; 00276 } 00277 } 00278 /* 00279 * Computational partitioning size is computed as the product of the logical 00280 * value returned by pilaenv_ and two times the least common multiple of nprow 00281 * and npcol. 00282 */ 00283 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type ) ) * 00284 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) ); 00285 00286 mn = MIN( M, N ); 00287 00288 if( Mupcase( UPLO[0] ) == CLOWER ) 00289 { 00290 /* 00291 * Lower triangle of sub( A ): proceed by block of columns. For each block of 00292 * columns, operate on the logical diagonal block first and then the remaining 00293 * rows of that block of columns. 00294 */ 00295 for( k = 0; k < mn; k += nb ) 00296 { 00297 kb = mn - k; ktmp = k + ( kb = MIN( kb, nb ) ); 00298 PB_Cplapd2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, BETA, Aptr, k, k, Ad0 ); 00299 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow ); 00300 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); 00301 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); 00302 if( ( Amp0 = Amp - Akp ) > 0 ) 00303 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp0, &Anq0, &izero, 00304 ALPHA, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); 00305 } 00306 } 00307 else if( Mupcase( UPLO[0] ) == CUPPER ) 00308 { 00309 /* 00310 * Upper triangle of sub( A ): proceed by block of columns. For each block of 00311 * columns, operate on the trailing rows and then the logical diagonal block 00312 * of that block of columns. When M < N, the last columns of sub( A ) are 00313 * handled together. 00314 */ 00315 for( k = 0; k < mn; k += nb ) 00316 { 00317 kb = mn - k; kb = MIN( kb, nb ); 00318 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); 00319 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); 00320 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); 00321 if( Akp > 0 ) 00322 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Akp, &Anq0, &izero, 00323 ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); 00324 PB_Cplapd2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, BETA, Aptr, k, k, Ad0 ); 00325 } 00326 if( ( Anq -= ( Akq += Anq0 ) ) > 0 ) 00327 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &Anq, &izero, ALPHA, 00328 ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); 00329 } 00330 else 00331 { 00332 /* 00333 * All of sub( A ): proceed by block of columns. For each block of columns, 00334 * operate on the trailing rows, then the logical diagonal block, and finally 00335 * the remaining rows of that block of columns. When M < N, the last columns 00336 * of sub( A ) are handled together. 00337 */ 00338 for( k = 0; k < mn; k += nb ) 00339 { 00340 kb = mn - k; kb = MIN( kb, nb ); 00341 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow ); 00342 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol ); 00343 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol ); 00344 if( Akp > 0 ) 00345 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Akp, &Anq0, &izero, 00346 ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); 00347 PB_Cplapd2( TYPE, UPLO, NOCONJG, kb, kb, ALPHA, BETA, Aptr, k, k, 00348 Ad0 ); 00349 Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow ); 00350 if( ( Amp0 = Amp - Akp ) > 0 ) 00351 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp0, &Anq0, &izero, 00352 ALPHA, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald ); 00353 } 00354 if( ( Anq -= ( Akq += Anq0 ) ) > 0 ) 00355 pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &Anq, &izero, ALPHA, 00356 ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald ); 00357 } 00358 /* 00359 * End of PB_Cplapad 00360 */ 00361 }