|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 /* --------------------------------------------------------------------- 00002 * 00003 * -- PBLAS auxiliary routine (version 2.0) -- 00004 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00005 * and University of California, Berkeley. 00006 * April 1, 1998 00007 * 00008 * --------------------------------------------------------------------- 00009 */ 00010 /* 00011 * Include files 00012 */ 00013 #include "../pblas.h" 00014 #include "../PBpblas.h" 00015 #include "../PBtools.h" 00016 #include "../PBblacs.h" 00017 #include "../PBblas.h" 00018 00019 #ifdef __STDC__ 00020 void PB_Cpsym( PBTYP_T * TYPE, PBTYP_T * UTYP, char * SIDE, 00021 char * UPLO, int N, int K, char * ALPHA, char * A, 00022 int IA, int JA, int * DESCA, char * XC, int LDXC, 00023 char * XR, int LDXR, char * YC, int LDYC, char * YR, 00024 int LDYR, TZSYM_T SYM ) 00025 #else 00026 void PB_Cpsym( TYPE, UTYP, SIDE, UPLO, N, K, ALPHA, A, IA, JA, DESCA, 00027 XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR, SYM ) 00028 /* 00029 * .. Scalar Arguments .. 00030 */ 00031 char * SIDE, * UPLO; 00032 int IA, JA, K, LDXC, LDXR, LDYC, LDYR, N; 00033 char * ALPHA; 00034 PBTYP_T * TYPE, * UTYP; 00035 TZSYM_T SYM; 00036 /* 00037 * .. Array Arguments .. 00038 */ 00039 int * DESCA; 00040 char * A, * XC, * XR, * YC, * YR; 00041 #endif 00042 { 00043 /* 00044 * Purpose 00045 * ======= 00046 * 00047 * PB_Cpsym performs a symmetric or Hermitian matrix-matrix or matrix- 00048 * vector multiplication. In the following, sub( A ) denotes the symme- 00049 * tric or Hermitian submatrix operand A( IA:IA+N-1, JA:JA+N-1 ). 00050 * 00051 * Notes 00052 * ===== 00053 * 00054 * A description vector is associated with each 2D block-cyclicly dis- 00055 * tributed matrix. This vector stores the information required to 00056 * establish the mapping between a matrix entry and its corresponding 00057 * process and memory location. 00058 * 00059 * In the following comments, the character _ should be read as 00060 * "of the distributed matrix". Let A be a generic term for any 2D 00061 * block cyclicly distributed matrix. Its description vector is DESC_A: 00062 * 00063 * NOTATION STORED IN EXPLANATION 00064 * ---------------- --------------- ------------------------------------ 00065 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. 00066 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating 00067 * the NPROW x NPCOL BLACS process grid 00068 * A is distributed over. The context 00069 * itself is global, but the handle 00070 * (the integer value) may vary. 00071 * M_A (global) DESCA[ M_ ] The number of rows in the distribu- 00072 * ted matrix A, M_A >= 0. 00073 * N_A (global) DESCA[ N_ ] The number of columns in the distri- 00074 * buted matrix A, N_A >= 0. 00075 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left 00076 * block of the matrix A, IMB_A > 0. 00077 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper 00078 * left block of the matrix A, 00079 * INB_A > 0. 00080 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- 00081 * bute the last M_A-IMB_A rows of A, 00082 * MB_A > 0. 00083 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- 00084 * bute the last N_A-INB_A columns of 00085 * A, NB_A > 0. 00086 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first 00087 * row of the matrix A is distributed, 00088 * NPROW > RSRC_A >= 0. 00089 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the 00090 * first column of A is distributed. 00091 * NPCOL > CSRC_A >= 0. 00092 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local 00093 * array storing the local blocks of 00094 * the distributed matrix A, 00095 * IF( Lc( 1, N_A ) > 0 ) 00096 * LLD_A >= MAX( 1, Lr( 1, M_A ) ) 00097 * ELSE 00098 * LLD_A >= 1. 00099 * 00100 * Let K be the number of rows of a matrix A starting at the global in- 00101 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows 00102 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would 00103 * receive if these K rows were distributed over NPROW processes. If K 00104 * is the number of columns of a matrix A starting at the global index 00105 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- 00106 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if 00107 * these K columns were distributed over NPCOL processes. 00108 * 00109 * The values of Lr() and Lc() may be determined via a call to the func- 00110 * tion PB_Cnumroc: 00111 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) 00112 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) 00113 * 00114 * Arguments 00115 * ========= 00116 * 00117 * TYPE (local input) pointer to a PBTYP_T structure 00118 * On entry, TYPE is a pointer to a structure of type PBTYP_T, 00119 * that contains type information (See pblas.h). 00120 * 00121 * UTYP (local input) pointer to a PBTYP_T structure 00122 * On entry, UTYP is a pointer to a structure of type PBTYP_T, 00123 * that contains type information for the Y's (See pblas.h). 00124 * 00125 * SIDE (global input) pointer to CHAR 00126 * On entry, SIDE specifies whether op( sub( A ) ) multiplies 00127 * its operand X from the left or right as follows: 00128 * 00129 * SIDE = 'L' or 'l' Y := alpha*op( sub( A ) )*X + Y, 00130 * 00131 * SIDE = 'R' or 'r' Y := alpha*X*op( sub( A ) ) + Y. 00132 * 00133 * UPLO (global input) pointer to CHAR 00134 * On entry, UPLO specifies whether the local pieces of 00135 * the array A containing the upper or lower triangular part 00136 * of the symmetric or Hermitian submatrix sub( A ) are to be 00137 * referenced as follows: 00138 * 00139 * UPLO = 'U' or 'u' Only the local pieces corresponding to 00140 * the upper triangular part of the sym- 00141 * metric or Hermitian submatrix sub( A ) 00142 * are to be referenced, 00143 * 00144 * UPLO = 'L' or 'l' Only the local pieces corresponding to 00145 * the lower triangular part of the sym- 00146 * metric or Hermitian submatrix sub( A ) 00147 * are to be referenced. 00148 * 00149 * N (global input) INTEGER 00150 * On entry, N specifies the order of the submatrix sub( A ). 00151 * N must be at least zero. 00152 * 00153 * K (global input) INTEGER 00154 * On entry, K specifies the local number of columns of the lo- 00155 * cal array XC and the local number of rows of the local array 00156 * XR. K mut be at least zero. 00157 * 00158 * ALPHA (global input) pointer to CHAR 00159 * On entry, ALPHA specifies the scalar alpha. 00160 * 00161 * A (local input) pointer to CHAR 00162 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is 00163 * at least Lc( 1, JA+N-1 ). Before entry, this array contains 00164 * the local entries of the matrix A. 00165 * Before entry with UPLO = 'U' or 'u', this array contains 00166 * the local entries corresponding to the upper triangular part 00167 * of the @(syhec) submatrix sub( A ), and the local entries 00168 * corresponding to the strictly lower triangular of sub( A ) 00169 * are not referenced. 00170 * Before entry with UPLO = 'L' or 'l', this array contains 00171 * the local entries corresponding to the lower triangular part 00172 * of the @(syhec) submatrix sub( A ), and the local entries 00173 * corresponding to the strictly upper triangular of sub( A ) 00174 * are not referenced. 00175 * 00176 * IA (global input) INTEGER 00177 * On entry, IA specifies A's global row index, which points to 00178 * the beginning of the submatrix sub( A ). 00179 * 00180 * JA (global input) INTEGER 00181 * On entry, JA specifies A's global column index, which points 00182 * to the beginning of the submatrix sub( A ). 00183 * 00184 * DESCA (global and local input) INTEGER array 00185 * On entry, DESCA is an integer array of dimension DLEN_. This 00186 * is the array descriptor for the matrix A. 00187 * 00188 * XC (local input) pointer to CHAR 00189 * On entry, XC is an array of dimension (LDXC,K). Before entry, 00190 * this array contains the local entries of the matrix XC. 00191 * 00192 * LDXC (local input) INTEGER 00193 * On entry, LDXC specifies the leading dimension of the array 00194 * XC. LDXC must be at least max( 1, Lp( IA, N ) ). 00195 * 00196 * XR (local input) pointer to CHAR 00197 * On entry, XR is an array of dimension (LDXR,Kx), where Kx is 00198 * at least Lc( JA, N ). Before entry, this array contains the 00199 * local entries of the matrix XR. 00200 * 00201 * LDXR (local input) INTEGER 00202 * On entry, LDXR specifies the leading dimension of the array 00203 * XR. LDXR must be at least max( 1, K ). 00204 * 00205 * YC (local input/local output) pointer to CHAR 00206 * On entry, YC is an array of dimension (LDYC,K). Before entry, 00207 * this array contains the local entries of the matrix YC. On 00208 * exit, this array contains the updated vector YC. 00209 * 00210 * LDYC (local input) INTEGER 00211 * On entry, LDYC specifies the leading dimension of the array 00212 * YC. LDYC must be at least max( 1, Lp( IA, N ) ). 00213 * 00214 * YR (local input/local output) pointer to CHAR 00215 * On entry, YR is an array of dimension (LDYR,Ky), where Ky is 00216 * at least Lc( JA, N ). Before entry, this array contains the 00217 * local entries of the matrix YR. On exit, this array contains 00218 * the updated vector YR. 00219 * 00220 * LDYR (local input) INTEGER 00221 * On entry, LDYR specifies the leading dimension of the array 00222 * YR. LDYR must be at least max( 1, K ). 00223 * 00224 * SYM (local input) pointer to function of type TZSYM_T 00225 * On entry, SYM specifies the function performing the symmetric 00226 * or Hermitian matrix-vector or matrix-matrix multiply of a 00227 * single block. 00228 * 00229 * -- Written on April 1, 1998 by 00230 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. 00231 * 00232 * --------------------------------------------------------------------- 00233 */ 00234 /* 00235 * .. Local Scalars .. 00236 */ 00237 int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq, 00238 Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl, 00239 IsRowRepl, XCinc, XRinc, Xii=0, Xjj=0, Xoffi=-1, Xoffj=-1, 00240 YCinc, YRinc, iimax, ilow, imbloc, inbloc, ioffd, ioffx, iupp, 00241 jjmax, joffd, joffx, lcmt, lcmt00, lmbloc, lnbloc, low, lower, 00242 m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc, nblkd, 00243 nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper; 00244 /* .. 00245 * .. Executable Statements .. 00246 * 00247 */ 00248 /* 00249 * Quick return if possible 00250 */ 00251 if( N <= 0 ) return; 00252 /* 00253 * Retrieve process grid information 00254 */ 00255 Cblacs_gridinfo( DESCA[CTXT_], &nprow, &npcol, &myrow, &mycol ); 00256 /* 00257 * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ... 00258 */ 00259 PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1, 00260 &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol ); 00261 /* 00262 * Quick return if I don't own any of sub( A ) or if sub( A ) is replicated in 00263 * all processes. 00264 */ 00265 if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return; 00266 00267 IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) ); 00268 IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) ); 00269 Amb = DESCA[ MB_ ]; Anb = DESCA[ NB_ ]; Ald = DESCA[ LLD_ ]; 00270 size = TYPE->size; 00271 00272 if( IsRowRepl && IsColRepl ) 00273 { 00274 SYM( TYPE, SIDE, UPLO, Amp, Anq, K, 0, ALPHA, Mptr( A, Aii, Ajj, Ald, 00275 size ), Ald, XC, LDXC, XR, LDXR, YC, LDYC, YR, LDYR ); 00276 return; 00277 } 00278 00279 XCinc = size; XRinc = LDXR * size; 00280 YCinc = UTYP->size; YRinc = LDYR * UTYP->size; 00281 upper = ( Mupcase( UPLO[0] ) == CUPPER ); 00282 lower = ( Mupcase( UPLO[0] ) == CLOWER ); 00283 /* 00284 * Initialize lcmt00, mblks, nblks, imbloc, inbloc, lmbloc, lnbloc, ilow, low, 00285 * iupp, and upp. 00286 */ 00287 PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00, 00288 &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low, 00289 &iupp, &upp ); 00290 00291 iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp ); 00292 jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq ); 00293 pmb = ( IsRowRepl ? Amb : nprow * Amb ); 00294 qnb = ( IsColRepl ? Anb : npcol * Anb ); 00295 /* 00296 * Handle separately the first row and/or column of the LCM table. Update the 00297 * LCM value of the curent block lcmt00, as well as the number of rows and 00298 * columns mblks and nblks remaining in the LCM table. 00299 */ 00300 GoSouth = ( lcmt00 > iupp ); 00301 GoEast = ( lcmt00 < ilow ); 00302 /* 00303 * Go through the table looking for blocks owning diagonal entries. 00304 */ 00305 if( ( !( GoSouth ) ) && ( !( GoEast ) ) ) 00306 { 00307 /* 00308 * The upper left block owns diagonal entries lcmt00 >= ilow && lcmt00 <= iupp 00309 */ 00310 SYM( TYPE, SIDE, UPLO, imbloc, inbloc, K, lcmt00, ALPHA, Mptr( A, Aii, 00311 Ajj, Ald, size ), Ald, XC+Xii*XCinc, LDXC, XR+Xjj*XRinc, LDXR, 00312 YC+Xii*YCinc, LDYC, YR+Xjj*YRinc, LDYR ); 00313 /* 00314 * Decide whether one should go south or east in the table: Go east if the 00315 * block below the current one only owns lower entries. If this block, however, 00316 * owns diagonals, then go south. 00317 */ 00318 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) ); 00319 00320 if( GoSouth ) 00321 { 00322 /* 00323 * When the upper triangular part of sub( A ) should be operated with and 00324 * one is planning to go south in the table, it is neccessary to take care 00325 * of the remaining columns of these imbloc rows immediately. 00326 */ 00327 if( upper && ( Anq > inbloc ) ) 00328 { 00329 tmp1 = Anq - inbloc; 00330 SYM( TYPE, SIDE, ALL, imbloc, tmp1, K, 0, 00331 ALPHA, Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald, 00332 XC+Xii*XCinc, LDXC, XR+(Xjj+inbloc)*XRinc, LDXR, 00333 YC+Xii*YCinc, LDYC, YR+(Xjj+inbloc)*YRinc, LDYR ); 00334 } 00335 Aii += imbloc; Xii += imbloc; m1 -= imbloc; 00336 } 00337 else 00338 { 00339 /* 00340 * When the lower triangular part of sub( A ) should be operated with and 00341 * one is planning to go east in the table, it is neccessary to take care 00342 * of the remaining rows of these inbloc columns immediately. 00343 */ 00344 if( lower && ( Amp > imbloc ) ) 00345 { 00346 tmp1 = Amp - imbloc; 00347 SYM( TYPE, SIDE, ALL, tmp1, inbloc, K, 0, 00348 ALPHA, Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald, 00349 XC+(Xii+imbloc)*XCinc, LDXC, XR+Xjj*XRinc, LDXR, 00350 YC+(Xii+imbloc)*YCinc, LDYC, YR+Xjj*YRinc, LDYR ); 00351 } 00352 Ajj += inbloc; Xjj += inbloc; n1 -= inbloc; 00353 } 00354 } 00355 00356 if( GoSouth ) 00357 { 00358 /* 00359 * Go one step south in the LCM table. Adjust the current LCM value as well as 00360 * the local row indexes in A and XC. 00361 */ 00362 lcmt00 -= ( iupp - upp + pmb ); mblks--; 00363 Aoffi += imbloc; Xoffi += imbloc; 00364 /* 00365 * While there are blocks remaining that own upper entries, keep going south. 00366 * Adjust the current LCM value as well as the local row indexes in A and XC. 00367 */ 00368 while( ( mblks > 0 ) && ( lcmt00 > upp ) ) 00369 { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; } 00370 /* 00371 * Operate with the upper triangular part of sub( A ) we just skipped when 00372 * necessary. 00373 */ 00374 tmp1 = MIN( Aoffi, iimax ) - Aii + 1; 00375 if( upper && ( tmp1 > 0 ) ) 00376 { 00377 SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0, 00378 ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, 00379 XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, 00380 YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); 00381 Aii += tmp1; Xii += tmp1; m1 -= tmp1; 00382 } 00383 /* 00384 * Return if no more row in the LCM table. 00385 */ 00386 if( mblks <= 0 ) return; 00387 /* 00388 * lcmt00 <= upp. The current block owns either diagonals or lower entries. 00389 * Save the current position in the LCM table. After this column has been 00390 * completely taken care of, re-start from this row and the next column of 00391 * the LCM table. 00392 */ 00393 lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi; 00394 00395 mbloc = Amb; 00396 while( ( mblkd > 0 ) && ( lcmt >= ilow ) ) 00397 { 00398 /* 00399 * A block owning diagonals lcmt00 >= ilow && lcmt00 <= upp has been found. 00400 */ 00401 if( mblkd == 1 ) mbloc = lmbloc; 00402 SYM( TYPE, SIDE, UPLO, mbloc, inbloc, K, lcmt, 00403 ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, 00404 XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, 00405 YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); 00406 lcmt00 = lcmt; lcmt -= pmb; 00407 mblks = mblkd; mblkd--; 00408 Aoffi = ioffd; ioffd += mbloc; 00409 Xoffi = ioffx; ioffx += mbloc; 00410 } 00411 /* 00412 * Operate with the lower triangular part of sub( A ). 00413 */ 00414 tmp1 = m1 - ioffd + Aii - 1; 00415 if( lower && ( tmp1 > 0 ) ) 00416 SYM( TYPE, SIDE, ALL, tmp1, inbloc, K, 0, 00417 ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, 00418 XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, 00419 YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); 00420 00421 tmp1 = Aoffi - Aii + 1; 00422 m1 -= tmp1; 00423 n1 -= inbloc; 00424 lcmt00 += low - ilow + qnb; 00425 nblks--; 00426 Aoffj += inbloc; 00427 Xoffj += inbloc; 00428 /* 00429 * Operate with the upper triangular part of sub( A ). 00430 */ 00431 if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) 00432 SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0, 00433 ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, 00434 XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, 00435 YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); 00436 Aii = Aoffi + 1; Ajj = Aoffj + 1; 00437 Xii = Xoffi + 1; Xjj = Xoffj + 1; 00438 } 00439 else if( GoEast ) 00440 { 00441 /* 00442 * Go one step east in the LCM table. Adjust the current LCM value as well as 00443 * the local column index in A and XR. 00444 */ 00445 lcmt00 += low - ilow + qnb; nblks--; 00446 Aoffj += inbloc; Xoffj += inbloc; 00447 /* 00448 * While there are blocks remaining that own lower entries, keep going east. 00449 * Adjust the current LCM value as well as the local column index in A and XR. 00450 */ 00451 while( ( nblks > 0 ) && ( lcmt00 < low ) ) 00452 { lcmt00 += qnb; nblks--; Aoffj += Anb; Xoffj += Anb; } 00453 /* 00454 * Operate with the lower triangular part of sub( A ). 00455 */ 00456 tmp1 = MIN( Aoffj, jjmax ) - Ajj + 1; 00457 if( lower && ( tmp1 > 0 ) ) 00458 { 00459 SYM( TYPE, SIDE, ALL, m1, tmp1, K, 0, 00460 ALPHA, Mptr( A, Aii, Ajj, Ald, size ), Ald, 00461 XC+Xii*XCinc, LDXC, XR+Xjj*XRinc, LDXR, 00462 YC+Xii*YCinc, LDYC, YR+Xjj*YRinc, LDYR ); 00463 Ajj += tmp1; Xjj += tmp1; n1 -= tmp1; 00464 } 00465 /* 00466 * Return if no more column in the LCM table. 00467 */ 00468 if( nblks <= 0 ) return; 00469 /* 00470 * lcmt00 >= low. The current block owns either diagonals or upper entries. 00471 * Save the current position in the LCM table. After this row has been 00472 * completely taken care of, re-start from this column and the next row of 00473 * the LCM table. 00474 */ 00475 lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffx = Xoffj; 00476 00477 nbloc = Anb; 00478 while( ( nblkd > 0 ) && ( lcmt <= iupp ) ) 00479 { 00480 /* 00481 * A block owning diagonals lcmt00 >= low && lcmt00 <= iupp has been found. 00482 */ 00483 if( nblkd == 1 ) nbloc = lnbloc; 00484 SYM( TYPE, SIDE, UPLO, imbloc, nbloc, K, lcmt, 00485 ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, 00486 XC+Xii*XCinc, LDXC, XR+(joffx+1)*XRinc, LDXR, 00487 YC+Xii*YCinc, LDYC, YR+(joffx+1)*YRinc, LDYR ); 00488 lcmt00 = lcmt; lcmt += qnb; 00489 nblks = nblkd; nblkd--; 00490 Aoffj = joffd; joffd += nbloc; 00491 Xoffj = joffx; joffx += nbloc; 00492 } 00493 /* 00494 * Operate with the upper triangular part of sub( A ). 00495 */ 00496 tmp1 = n1 - joffd + Ajj - 1; 00497 if( upper && ( tmp1 > 0 ) ) 00498 SYM( TYPE, SIDE, ALL, imbloc, tmp1, K, 0, 00499 ALPHA, Mptr( A, Aii, joffd+1, Ald, size ), Ald, 00500 XC+Xii*XCinc, LDXC, XR+(joffx+1)*XRinc, LDXR, 00501 YC+Xii*YCinc, LDYC, YR+(joffx+1)*YRinc, LDYR ); 00502 00503 tmp1 = Aoffj - Ajj + 1; 00504 m1 -= imbloc; 00505 n1 -= tmp1; 00506 lcmt00 -= ( iupp - upp + pmb ); 00507 mblks--; 00508 Aoffi += imbloc; 00509 Xoffi += imbloc; 00510 /* 00511 * Operate with the lower triangular part of sub( A ). 00512 */ 00513 if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) ) 00514 SYM( TYPE, SIDE, ALL, m1, tmp1, K, 0, 00515 ALPHA, Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, 00516 XC+(Xoffi+1)*XCinc, LDXC, XR+Xjj*XRinc, LDXR, 00517 YC+(Xoffi+1)*YCinc, LDYC, YR+Xjj*YRinc, LDYR ); 00518 Aii = Aoffi + 1; Ajj = Aoffj + 1; 00519 Xii = Xoffi + 1; Xjj = Xoffj + 1; 00520 } 00521 /* 00522 * Loop over the remaining columns of the LCM table. 00523 */ 00524 nbloc = Anb; 00525 while( nblks > 0 ) 00526 { 00527 if( nblks == 1 ) nbloc = lnbloc; 00528 /* 00529 * While there are blocks remaining that own upper entries, keep going south. 00530 * Adjust the current LCM value as well as the local row index in A and XC. 00531 */ 00532 while( ( mblks > 0 ) && ( lcmt00 > upp ) ) 00533 { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; } 00534 /* 00535 * Operate with the upper triangular part of sub( A ). 00536 */ 00537 tmp1 = MIN( Aoffi, iimax ) - Aii + 1; 00538 if( upper && ( tmp1 > 0 ) ) 00539 { 00540 SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0, 00541 ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, 00542 XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, 00543 YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); 00544 Aii += tmp1; 00545 Xii += tmp1; 00546 m1 -= tmp1; 00547 } 00548 /* 00549 * Return if no more row in the LCM table. 00550 */ 00551 if( mblks <= 0 ) return; 00552 /* 00553 * lcmt00 <= upp. The current block owns either diagonals or lower entries. 00554 * Save the current position in the LCM table. After this column has been 00555 * completely taken care of, re-start from this row and the next column of 00556 * the LCM table. 00557 */ 00558 lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi; 00559 00560 mbloc = Amb; 00561 while( ( mblkd > 0 ) && ( lcmt >= low ) ) 00562 { 00563 /* 00564 * A block owning diagonals lcmt00 >= low && lcmt00 <= upp has been found. 00565 */ 00566 if( mblkd == 1 ) mbloc = lmbloc; 00567 SYM( TYPE, SIDE, UPLO, mbloc, nbloc, K, lcmt, 00568 ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, 00569 XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, 00570 YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); 00571 lcmt00 = lcmt; lcmt -= pmb; 00572 mblks = mblkd; mblkd--; 00573 Aoffi = ioffd; Xoffi = ioffx; 00574 ioffd += mbloc; ioffx += mbloc; 00575 } 00576 /* 00577 * Operate with the lower triangular part of sub( A ). 00578 */ 00579 tmp1 = m1 - ioffd + Aii - 1; 00580 if( lower && ( tmp1 > 0 ) ) 00581 SYM( TYPE, SIDE, ALL, tmp1, nbloc, K, 0, 00582 ALPHA, Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald, 00583 XC+(ioffx+1)*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, 00584 YC+(ioffx+1)*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); 00585 00586 tmp1 = MIN( Aoffi, iimax ) - Aii + 1; 00587 m1 -= tmp1; 00588 n1 -= nbloc; 00589 lcmt00 += qnb; 00590 nblks--; 00591 Aoffj += nbloc; 00592 Xoffj += nbloc; 00593 /* 00594 * Operate with the upper triangular part of sub( A ). 00595 */ 00596 if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) ) 00597 SYM( TYPE, SIDE, ALL, tmp1, n1, K, 0, 00598 ALPHA, Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, 00599 XC+Xii*XCinc, LDXC, XR+(Xoffj+1)*XRinc, LDXR, 00600 YC+Xii*YCinc, LDYC, YR+(Xoffj+1)*YRinc, LDYR ); 00601 Aii = Aoffi + 1; Ajj = Aoffj + 1; 00602 Xii = Xoffi + 1; Xjj = Xoffj + 1; 00603 } 00604 /* 00605 * End of PB_Cpsym 00606 */ 00607 }