|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 /* --------------------------------------------------------------------- 00002 * 00003 * -- PBLAS auxiliary routine (version 2.0) -- 00004 * University of Tennessee, Knoxville, Oak Ridge National Laboratory, 00005 * and University of California, Berkeley. 00006 * April 1, 1998 00007 * 00008 * --------------------------------------------------------------------- 00009 */ 00010 /* 00011 * Include files 00012 */ 00013 #include "../pblas.h" 00014 #include "../PBpblas.h" 00015 #include "../PBtools.h" 00016 #include "../PBblacs.h" 00017 #include "../PBblas.h" 00018 00019 #ifdef __STDC__ 00020 void PB_Cptran( PBTYP_T * TYPE, char * CONJUG, int M, int N, 00021 char * ALPHA, char * A, int IA, int JA, int * DESCA, 00022 char * BETA, char * C, int IC, int JC, int * DESCC ) 00023 #else 00024 void PB_Cptran( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, BETA, 00025 C, IC, JC, DESCC ) 00026 /* 00027 * .. Scalar Arguments .. 00028 */ 00029 char * CONJUG; 00030 int IA, IC, JA, JC, M, N; 00031 char * ALPHA, * BETA; 00032 PBTYP_T * TYPE; 00033 /* 00034 * .. Array Arguments .. 00035 */ 00036 int * DESCA, * DESCC; 00037 char * A, * C; 00038 #endif 00039 { 00040 /* 00041 * Purpose 00042 * ======= 00043 * 00044 * PB_Cptran transposes a matrix 00045 * 00046 * sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) 00047 * 00048 * where 00049 * 00050 * sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), 00051 * 00052 * sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, 00053 * 00054 * op( X ) = X' or op( X ) = conjg( X )'. 00055 * 00056 * Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an 00057 * n by m submatrix. 00058 * 00059 * Notes 00060 * ===== 00061 * 00062 * A description vector is associated with each 2D block-cyclicly dis- 00063 * tributed matrix. This vector stores the information required to 00064 * establish the mapping between a matrix entry and its corresponding 00065 * process and memory location. 00066 * 00067 * In the following comments, the character _ should be read as 00068 * "of the distributed matrix". Let A be a generic term for any 2D 00069 * block cyclicly distributed matrix. Its description vector is DESC_A: 00070 * 00071 * NOTATION STORED IN EXPLANATION 00072 * ---------------- --------------- ------------------------------------ 00073 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type. 00074 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating 00075 * the NPROW x NPCOL BLACS process grid 00076 * A is distributed over. The context 00077 * itself is global, but the handle 00078 * (the integer value) may vary. 00079 * M_A (global) DESCA[ M_ ] The number of rows in the distribu- 00080 * ted matrix A, M_A >= 0. 00081 * N_A (global) DESCA[ N_ ] The number of columns in the distri- 00082 * buted matrix A, N_A >= 0. 00083 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left 00084 * block of the matrix A, IMB_A > 0. 00085 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper 00086 * left block of the matrix A, 00087 * INB_A > 0. 00088 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri- 00089 * bute the last M_A-IMB_A rows of A, 00090 * MB_A > 0. 00091 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri- 00092 * bute the last N_A-INB_A columns of 00093 * A, NB_A > 0. 00094 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first 00095 * row of the matrix A is distributed, 00096 * NPROW > RSRC_A >= 0. 00097 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the 00098 * first column of A is distributed. 00099 * NPCOL > CSRC_A >= 0. 00100 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local 00101 * array storing the local blocks of 00102 * the distributed matrix A, 00103 * IF( Lc( 1, N_A ) > 0 ) 00104 * LLD_A >= MAX( 1, Lr( 1, M_A ) ) 00105 * ELSE 00106 * LLD_A >= 1. 00107 * 00108 * Let K be the number of rows of a matrix A starting at the global in- 00109 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows 00110 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would 00111 * receive if these K rows were distributed over NPROW processes. If K 00112 * is the number of columns of a matrix A starting at the global index 00113 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co- 00114 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if 00115 * these K columns were distributed over NPCOL processes. 00116 * 00117 * The values of Lr() and Lc() may be determined via a call to the func- 00118 * tion PB_Cnumroc: 00119 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW ) 00120 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL ) 00121 * 00122 * Arguments 00123 * ========= 00124 * 00125 * TYPE (local input) pointer to a PBTYP_T structure 00126 * On entry, TYPE is a pointer to a structure of type PBTYP_T, 00127 * that contains type information (See pblas.h). 00128 * 00129 * CONJUG (global input) pointer to CHAR 00130 * On entry, CONJUG specifies whether conjg( sub( A ) ) or 00131 * sub( A ) should be added to sub( C ) as follows: 00132 * CONJUG = 'N' or 'n': 00133 * sub( C ) := beta*sub( C ) + alpha*sub( A )' 00134 * otherwise 00135 * sub( C ) := beta*sub( C ) + alpha*conjg( sub( A ) )'. 00136 * 00137 * M (global input) INTEGER 00138 * On entry, M specifies the number of rows of the submatrix 00139 * sub( C ) and the number of columns of the submatrix sub( A ). 00140 * M must be at least zero. 00141 * 00142 * N (global input) INTEGER 00143 * On entry, N specifies the number of columns of the submatrix 00144 * sub( C ) and the number of rows of the submatrix sub( A ). N 00145 * must be at least zero. 00146 * 00147 * ALPHA (global input) pointer to CHAR 00148 * On entry, ALPHA specifies the scalar alpha. When ALPHA is 00149 * supplied as zero then the local entries of the array A 00150 * corresponding to the entries of the submatrix sub( A ) need 00151 * not be set on input. 00152 * 00153 * A (local input) pointer to CHAR 00154 * On entry, A is an array of dimension (LLD_A, Ka), where Ka is 00155 * at least Lc( 1, JA+M-1 ). Before entry, this array contains 00156 * the local entries of the matrix A. 00157 * 00158 * IA (global input) INTEGER 00159 * On entry, IA specifies A's global row index, which points to 00160 * the beginning of the submatrix sub( A ). 00161 * 00162 * JA (global input) INTEGER 00163 * On entry, JA specifies A's global column index, which points 00164 * to the beginning of the submatrix sub( A ). 00165 * 00166 * DESCA (global and local input) INTEGER array 00167 * On entry, DESCA is an integer array of dimension DLEN_. This 00168 * is the array descriptor for the matrix A. 00169 * 00170 * BETA (global input) pointer to CHAR 00171 * On entry, BETA specifies the scalar beta. When BETA is 00172 * supplied as zero then the local entries of the array C 00173 * corresponding to the entries of the submatrix sub( C ) need 00174 * not be set on input. 00175 * 00176 * C (local input/local output) pointer to CHAR 00177 * On entry, C is an array of dimension (LLD_C, Kc), where Kc is 00178 * at least Lc( 1, JC+N-1 ). Before entry, this array contains 00179 * the local entries of the matrix C. 00180 * On exit, the entries of this array corresponding to the local 00181 * entries of the submatrix sub( C ) are overwritten by the 00182 * local entries of the m by n updated submatrix. 00183 * 00184 * IC (global input) INTEGER 00185 * On entry, IC specifies C's global row index, which points to 00186 * the beginning of the submatrix sub( C ). 00187 * 00188 * JC (global input) INTEGER 00189 * On entry, JC specifies C's global column index, which points 00190 * to the beginning of the submatrix sub( C ). 00191 * 00192 * DESCC (global and local input) INTEGER array 00193 * On entry, DESCC is an integer array of dimension DLEN_. This 00194 * is the array descriptor for the matrix C. 00195 * 00196 * -- Written on April 1, 1998 by 00197 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA. 00198 * 00199 * --------------------------------------------------------------------- 00200 */ 00201 /* 00202 * .. Local Scalars .. 00203 */ 00204 char Aroc, Croc, * one, * talpha, * tbeta, * zero; 00205 int ACnD, ACnR, Abufld, AcurrocR, Afr, AiD, AiR, AiiD, AiiR, 00206 AinbD, AinbR, Ainb1D, Ainb1R, AisR, Akk, Ald, AmyprocD, 00207 AmyprocR, AnbD, AnbR, AnpD, AnpR, AnprocsD, AnprocsR, Aoff, 00208 ArocD, ArocR, AsrcR, Cbufld, CcurrocR, Cfr, CiD, CiR, CiiD, 00209 CiiR, CinbD, CinbR, Cinb1D, Cinb1R, CisR, Ckk, Cld, CmyprocD, 00210 CmyprocR, CnbD, CnbR, CnpD, CnpR, CnprocsD, CnprocsR, Coff, 00211 CrocD, CrocR, CsrcR, ctxt, col2row, gcdPQ, k, kb, kbb, l, 00212 lcmPQ, lcmb, maxp, maxq, mycol, myrow, ncpq, npcol, npq, 00213 nprow, nrpq, p, q, size; 00214 PB_VM_T VM; 00215 /* 00216 * .. Local Arrays .. 00217 */ 00218 int DBUFA[DLEN_], DBUFC[DLEN_]; 00219 char * Abuf = NULL, * Cbuf = NULL; 00220 /* .. 00221 * .. Executable Statements .. 00222 * 00223 */ 00224 /* 00225 * Retrieve process grid information 00226 */ 00227 Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol ); 00228 /* 00229 * Loop over the rows of sub( C ) when M <= N, and the columns of sub( C ) 00230 * otherwise. 00231 */ 00232 col2row = ( ( M <= N ) || ( nprow == 1 ) || ( DESCA[RSRC_] == -1 ) ); 00233 00234 if( col2row ) 00235 { 00236 AinbR = DESCA[INB_]; AnbR = DESCA[NB_]; AsrcR = DESCA[CSRC_]; 00237 CinbR = DESCC[IMB_]; CnbR = DESCC[MB_]; CsrcR = DESCC[RSRC_]; 00238 /* 00239 * If sub( A ) only spans one process column and sub( C ) spans only one process 00240 * row, then there is no need to pack the data. 00241 */ 00242 if( !( PB_Cspan( M, JA, AinbR, AnbR, AsrcR, npcol ) ) && 00243 !( PB_Cspan( M, IC, CinbR, CnbR, CsrcR, nprow ) ) ) 00244 { 00245 PB_Cpaxpby( TYPE, CONJUG, N, M, ALPHA, A, IA, JA, DESCA, COLUMN, BETA, 00246 C, IC, JC, DESCC, ROW ); 00247 return; 00248 } 00249 /* 00250 * Compute local information for sub( A ) and sub( C ) 00251 */ 00252 ACnR = M; ACnD = N; 00253 AmyprocD = CmyprocR = myrow; AnprocsD = CnprocsR = nprow; 00254 AmyprocR = CmyprocD = mycol; CnprocsD = AnprocsR = npcol; 00255 AiD = IA; AiR = JA; Aroc = CCOLUMN; 00256 AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_]; 00257 PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR, 00258 &AiiD, &AiiR, &ArocD, &ArocR ); 00259 CiD = JC; CiR = IC; Croc = CROW; 00260 CinbD = DESCC[INB_]; CnbD = DESCC[NB_]; Cld = DESCC[LLD_]; 00261 PB_Cinfog2l( IC, JC, DESCC, CnprocsR, CnprocsD, CmyprocR, CmyprocD, 00262 &CiiR, &CiiD, &CrocR, &CrocD ); 00263 } 00264 else 00265 { 00266 AinbR = DESCA[IMB_]; AnbR = DESCA[MB_]; AsrcR = DESCA[RSRC_]; 00267 CinbR = DESCC[INB_]; CnbR = DESCC[NB_]; CsrcR = DESCC[CSRC_]; 00268 /* 00269 * If sub( A ) only spans one process row and sub( C ) spans only one process 00270 * column, then there is no need to pack the data. 00271 */ 00272 if( !( PB_Cspan( N, IA, AinbR, AnbR, AsrcR, nprow ) ) && 00273 !( PB_Cspan( N, JC, CinbR, CnbR, CsrcR, npcol ) ) ) 00274 { 00275 PB_Cpaxpby( TYPE, CONJUG, N, M, ALPHA, A, IA, JA, DESCA, ROW, BETA, C, 00276 IC, JC, DESCC, COLUMN ); 00277 return; 00278 } 00279 /* 00280 * Compute local information for sub( A ) and sub( C ) 00281 */ 00282 ACnD = M; ACnR = N; 00283 AmyprocR = CmyprocD = myrow; AnprocsR = CnprocsD = nprow; 00284 AmyprocD = CmyprocR = mycol; AnprocsD = CnprocsR = npcol; 00285 00286 AiD = JA; AiR = IA; Aroc = CROW; 00287 AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_]; 00288 PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD, 00289 &AiiR, &AiiD, &ArocR, &ArocD ); 00290 CiD = IC; CiR = JC; Croc = CCOLUMN; 00291 CinbD = DESCC[IMB_]; CnbD = DESCC[MB_]; Cld = DESCC[LLD_]; 00292 PB_Cinfog2l( IC, JC, DESCC, CnprocsD, CnprocsR, CmyprocD, CmyprocR, 00293 &CiiD, &CiiR, &CrocD, &CrocR ); 00294 } 00295 00296 size = TYPE->size; one = TYPE->one; zero = TYPE->zero; 00297 kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) ); 00298 00299 Ainb1D = PB_Cfirstnb( ACnD, AiD, AinbD, AnbD ); 00300 AnpD = PB_Cnumroc( ACnD, 0, Ainb1D, AnbD, AmyprocD, ArocD, AnprocsD ); 00301 Ainb1R = PB_Cfirstnb( ACnR, AiR, AinbR, AnbR ); 00302 AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) ); 00303 00304 Cinb1D = PB_Cfirstnb( ACnD, CiD, CinbD, CnbD ); 00305 CnpD = PB_Cnumroc( ACnD, 0, Cinb1D, CnbD, CmyprocD, CrocD, CnprocsD ); 00306 Cinb1R = PB_Cfirstnb( ACnR, CiR, CinbR, CnbR ); 00307 CisR = ( ( CsrcR < 0 ) || ( CnprocsR == 1 ) ); 00308 00309 lcmb = PB_Clcm( ( maxp = ( CisR ? 1 : CnprocsR ) ) * CnbR, 00310 ( maxq = ( AisR ? 1 : AnprocsR ) ) * AnbR ); 00311 gcdPQ = PB_Cgcd( maxp, maxq ); 00312 lcmPQ = ( maxp / gcdPQ ) * maxq; 00313 /* 00314 * Loop over the processes of the virtual grid 00315 */ 00316 for( k = 0; k < gcdPQ; k++ ) 00317 { 00318 p = 0; q = k; 00319 00320 for( l = 0; l < lcmPQ; l++ ) 00321 { 00322 AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, AnprocsR ) ); 00323 CcurrocR = ( CisR ? -1 : MModAdd( CrocR, p, CnprocsR ) ); 00324 00325 if( ( AisR || ( AmyprocR == AcurrocR ) ) || 00326 ( CisR || ( CmyprocR == CcurrocR ) ) ) 00327 { 00328 Ckk = CiiR; Akk = AiiR; 00329 /* 00330 * Initialize local virtual matrix in process (p,q) 00331 */ 00332 CnpR = PB_Cnumroc( ACnR, 0, Cinb1R, CnbR, CcurrocR, CrocR, 00333 CnprocsR ); 00334 AnpR = PB_Cnumroc( ACnR, 0, Ainb1R, AnbR, AcurrocR, ArocR, 00335 AnprocsR ); 00336 PB_CVMinit( &VM, 0, CnpR, AnpR, Cinb1R, Ainb1R, CnbR, AnbR, p, q, 00337 maxp, maxq, lcmb ); 00338 /* 00339 * Find how many diagonals in this virtual process 00340 */ 00341 npq = PB_CVMnpq( &VM ); 00342 /* 00343 * Re-adjust the number of rows or columns to be (un)packed, in order to 00344 * average the message sizes. 00345 */ 00346 if( npq ) kbb = npq / ( ( npq - 1 ) / kb + 1 ); 00347 00348 if( col2row ) 00349 { 00350 while( npq ) 00351 { 00352 kbb = MIN( kbb, npq ); 00353 /* 00354 * Find out how many columns of sub( A ) and rows of sub( C ) are contiguous 00355 */ 00356 PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff ); 00357 /* 00358 * Compute the descriptor DBUFA for the buffer that will contained the packed 00359 * columns of sub( A ). 00360 */ 00361 if( ( Afr = ( ncpq < kbb ) ) != 0 ) 00362 { 00363 /* 00364 * If columns of sub( A ) are not contiguous, then allocate the buffer and 00365 * pack the kbb columns of sub( A ). 00366 */ 00367 Abufld = MAX( 1, AnpD ); 00368 if( AisR || ( AmyprocR == AcurrocR ) ) 00369 { 00370 Abuf = PB_Cmalloc( AnpD * kbb * size ); 00371 PB_CVMpack( TYPE, &VM, COLUMN, &Aroc, PACKING, NOTRAN, 00372 kbb, AnpD, one, Mptr( A, AiiD, Akk, Ald, 00373 size ), Ald, zero, Abuf, Abufld ); 00374 } 00375 } 00376 else 00377 { 00378 /* 00379 * Otherwise, re-use sub( A ) directly. 00380 */ 00381 Abufld = Ald; 00382 if( AisR || ( AmyprocR == AcurrocR ) ) 00383 Abuf = Mptr( A, AiiD, Akk+Aoff, Ald, size ); 00384 } 00385 PB_Cdescset( DBUFA, ACnD, kbb, Ainb1D, kbb, AnbD, kbb, ArocD, 00386 AcurrocR, ctxt, Abufld ); 00387 /* 00388 * Compute the descriptor DBUFC for the buffer that will contained the packed 00389 * rows of sub( C ). Allocate it. 00390 */ 00391 if( ( Cfr = ( nrpq < kbb ) ) != 0 ) 00392 { 00393 /* 00394 * If rows of sub( C ) are not contiguous, then allocate receiving buffer. 00395 */ 00396 Cbufld = kbb; talpha = one; tbeta = zero; 00397 if( CisR || ( CmyprocR == CcurrocR ) ) 00398 Cbuf = PB_Cmalloc( CnpD * kbb * size ); 00399 } 00400 else 00401 { 00402 /* 00403 * Otherwise, re-use sub( C ) directly. 00404 */ 00405 Cbufld = Cld; talpha = ALPHA; tbeta = BETA; 00406 if( CisR || ( CmyprocR == CcurrocR ) ) 00407 Cbuf = Mptr( C, Ckk+Coff, CiiD, Cld, size ); 00408 } 00409 PB_Cdescset( DBUFC, kbb, ACnD, kbb, Cinb1D, kbb, CnbD, 00410 CcurrocR, CrocD, ctxt, Cbufld ); 00411 /* 00412 * Transpose the one-dimensional buffer Abuf into Cbuf. 00413 */ 00414 PB_Cpaxpby( TYPE, CONJUG, ACnD, kbb, talpha, Abuf, 0, 0, 00415 DBUFA, &Aroc, tbeta, Cbuf, 0, 0, DBUFC, &Croc ); 00416 /* 00417 * Release the buffer containing the packed columns of sub( A ) 00418 */ 00419 if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) ) 00420 if( Abuf ) free( Abuf ); 00421 /* 00422 * Unpack the kbb rows of sub( C ) and release the buffer containing them. 00423 */ 00424 if( Cfr && ( CisR || ( CmyprocR == CcurrocR ) ) ) 00425 { 00426 PB_CVMpack( TYPE, &VM, ROW, &Croc, UNPACKING, NOTRAN, 00427 kbb, CnpD, BETA, Mptr( C, Ckk, CiiD, Cld, 00428 size ), Cld, ALPHA, Cbuf, Cbufld ); 00429 if( Cbuf ) free( Cbuf ); 00430 } 00431 /* 00432 * Update the local column index of sub( A ) and the local row index of sub( C ) 00433 */ 00434 PB_CVMupdate( &VM, kbb, &Ckk, &Akk ); 00435 npq -= kbb; 00436 } 00437 } 00438 else 00439 { 00440 while( npq ) 00441 { 00442 kbb = MIN( kbb, npq ); 00443 /* 00444 * Find out how many rows of sub( A ) and columns of sub( C ) are contiguous 00445 */ 00446 PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff ); 00447 /* 00448 * Compute the descriptor DBUFA for the buffer that will contained the packed 00449 * rows of sub( A ). 00450 */ 00451 if( ( Afr = ( ncpq < kbb ) ) != 0 ) 00452 { 00453 /* 00454 * If rows of sub( A ) are not contiguous, then allocate the buffer and pack 00455 * the kbb rows of sub( A ). 00456 */ 00457 Abufld = kbb; 00458 if( AisR || ( AmyprocR == AcurrocR ) ) 00459 { 00460 Abuf = PB_Cmalloc( AnpD * kbb * size ); 00461 PB_CVMpack( TYPE, &VM, COLUMN, &Aroc, PACKING, NOTRAN, 00462 kbb, AnpD, one, Mptr( A, Akk, AiiD, Ald, 00463 size ), Ald, zero, Abuf, Abufld ); 00464 } 00465 } 00466 else 00467 { 00468 /* 00469 * Otherwise, re-use sub( A ) directly. 00470 */ 00471 Abufld = Ald; 00472 if( AisR || ( AmyprocR == AcurrocR ) ) 00473 Abuf = Mptr( A, Akk+Aoff, AiiD, Ald, size ); 00474 } 00475 PB_Cdescset( DBUFA, kbb, ACnD, kbb, Ainb1D, kbb, AnbD, 00476 AcurrocR, ArocD, ctxt, Abufld ); 00477 /* 00478 * Compute the descriptor DBUFC for the buffer that will contained the packed 00479 * columns of sub( C ). Allocate it. 00480 */ 00481 if( ( Cfr = ( nrpq < kbb ) ) != 0 ) 00482 { 00483 /* 00484 * If columns of sub( C ) are not contiguous, then allocate receiving buffer. 00485 */ 00486 Cbufld = MAX( 1, CnpD ); talpha = one; tbeta = zero; 00487 if( CisR || ( CmyprocR == CcurrocR ) ) 00488 Cbuf = PB_Cmalloc( CnpD * kbb * size ); 00489 } 00490 else 00491 { 00492 Cbufld = Cld; talpha = ALPHA; tbeta = BETA; 00493 if( CisR || ( CmyprocR == CcurrocR ) ) 00494 Cbuf = Mptr( C, CiiD, Ckk+Coff, Cld, size ); 00495 } 00496 PB_Cdescset( DBUFC, ACnD, kbb, Cinb1D, kbb, CnbD, kbb, CrocD, 00497 CcurrocR, ctxt, Cbufld ); 00498 /* 00499 * Transpose the one-dimensional buffer Abuf into Cbuf. 00500 */ 00501 PB_Cpaxpby( TYPE, CONJUG, kbb, ACnD, talpha, Abuf, 0, 0, 00502 DBUFA, &Aroc, tbeta, Cbuf, 0, 0, DBUFC, &Croc ); 00503 /* 00504 * Release the buffer containing the packed rows of sub( A ) 00505 */ 00506 if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) ) 00507 if( Abuf ) free( Abuf ); 00508 /* 00509 * Unpack the kbb columns of sub( C ) and release the buffer containing them. 00510 */ 00511 if( Cfr && ( CisR || ( CmyprocR == CcurrocR ) ) ) 00512 { 00513 PB_CVMpack( TYPE, &VM, ROW, &Croc, UNPACKING, NOTRAN, 00514 kbb, CnpD, BETA, Mptr( C, CiiD, Ckk, Cld, 00515 size ), Cld, ALPHA, Cbuf, Cbufld ); 00516 if( Cbuf ) free( Cbuf ); 00517 } 00518 /* 00519 * Update the local row index of sub( A ) and the local column index of sub( C ) 00520 */ 00521 PB_CVMupdate( &VM, kbb, &Ckk, &Akk ); 00522 npq -= kbb; 00523 } 00524 } 00525 } 00526 p = MModAdd1( p, maxp ); 00527 q = MModAdd1( q, maxq ); 00528 } 00529 } 00530 /* 00531 * End of PB_Cptran 00532 */ 00533 }