|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 #include "redist.h" 00143 #define static2 static 00144 #if defined(Add_) || defined(f77IsF2C) 00145 #define fortran_mr2d pigemr2do_ 00146 #define fortran_mr2dnew pigemr2d_ 00147 #elif defined(UpCase) 00148 #define fortran_mr2dnew PIGEMR2D 00149 #define fortran_mr2d PIGEMR2DO 00150 #define icopy_ ICOPY 00151 #define ilacpy_ ILACPY 00152 #else 00153 #define fortran_mr2d pigemr2do 00154 #define fortran_mr2dnew pigemr2d 00155 #define icopy_ icopy 00156 #define ilacpy_ ilacpy 00157 #endif 00158 #define Clacpy Cigelacpy 00159 void Clacpy(); 00160 typedef struct { 00161 int desctype; 00162 int ctxt; 00163 int m; 00164 int n; 00165 int nbrow; 00166 int nbcol; 00167 int sprow; 00168 int spcol; 00169 int lda; 00170 } MDESC; 00171 #define BLOCK_CYCLIC_2D 1 00172 typedef struct { 00173 int lstart; 00174 int len; 00175 } IDESC; 00176 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) 00177 #define max(A,B) ((A)>(B)?(A):(B)) 00178 #define min(A,B) ((A)>(B)?(B):(A)) 00179 #define DIVUP(a,b) ( ((a)-1) /(b)+1) 00180 #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) 00181 #ifdef MALLOCDEBUG 00182 #define malloc mymalloc 00183 #define free myfree 00184 #define realloc myrealloc 00185 #endif 00186 /* Cblacs */ 00187 extern void Cblacs_pcoord(); 00188 extern int Cblacs_pnum(); 00189 extern void Csetpvmtids(); 00190 extern void Cblacs_get(); 00191 extern void Cblacs_pinfo(); 00192 extern void Cblacs_gridinfo(); 00193 extern void Cblacs_gridinit(); 00194 extern void Cblacs_exit(); 00195 extern void Cblacs_gridexit(); 00196 extern void Cblacs_setup(); 00197 extern void Cigebs2d(); 00198 extern void Cigebr2d(); 00199 extern void Cigesd2d(); 00200 extern void Cigerv2d(); 00201 extern void Cigsum2d(); 00202 extern void Cigamn2d(); 00203 extern void Cigamx2d(); 00204 extern void Cigesd2d(); 00205 extern void Cigerv2d(); 00206 /* lapack */ 00207 void ilacpy_(); 00208 /* aux fonctions */ 00209 extern int localindice(); 00210 extern void *mr2d_malloc(); 00211 extern int ppcm(); 00212 extern int localsize(); 00213 extern int memoryblocksize(); 00214 extern int changeorigin(); 00215 extern void paramcheck(); 00216 /* tools and others function */ 00217 #define scanD0 igescanD0 00218 #define dispmat igedispmat 00219 #define setmemory igesetmemory 00220 #define freememory igefreememory 00221 #define scan_intervals igescan_intervals 00222 extern void scanD0(); 00223 extern void dispmat(); 00224 extern void setmemory(); 00225 extern void freememory(); 00226 extern int scan_intervals(); 00227 extern void Cpigemr2do(); 00228 extern void Cpigemr2d(); 00229 /* some defines for Cpigemr2do */ 00230 #define SENDBUFF 0 00231 #define RECVBUFF 1 00232 #define SIZEBUFF 2 00233 #if 0 00234 #define DEBUG 00235 #endif 00236 #ifndef DEBUG 00237 #define NDEBUG 00238 #endif 00239 #include <stdio.h> 00240 #include <stdlib.h> 00241 #include <assert.h> 00242 #define DESCLEN 9 00243 void 00244 fortran_mr2d(m, n, A, ia, ja, desc_A, 00245 B, ib, jb, desc_B) 00246 int *ia, *ib, *ja, *jb, *m, *n; 00247 int desc_A[DESCLEN], desc_B[DESCLEN]; 00248 int *A, *B; 00249 { 00250 Cpigemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A, 00251 B, *ib, *jb, (MDESC *) desc_B); 00252 return; 00253 } 00254 void 00255 fortran_mr2dnew(m, n, A, ia, ja, desc_A, 00256 B, ib, jb, desc_B, gcontext) 00257 int *ia, *ib, *ja, *jb, *m, *n; 00258 int desc_A[DESCLEN], desc_B[DESCLEN]; 00259 int *A, *B; 00260 int *gcontext; 00261 { 00262 Cpigemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A, 00263 B, *ib, *jb, (MDESC *) desc_B, *gcontext); 00264 return; 00265 } 00266 static2 void init_chenille(); 00267 static2 int inter_len(); 00268 static2 int block2buff(); 00269 static2 void buff2block(); 00270 static2 void gridreshape(); 00271 void 00272 Cpigemr2do(m, n, 00273 ptrmyblock, ia, ja, ma, 00274 ptrmynewblock, ib, jb, mb) 00275 int *ptrmyblock, *ptrmynewblock; 00276 /* pointers to the memory location of the matrix and the redistributed matrix */ 00277 MDESC *ma; 00278 MDESC *mb; 00279 int ia, ja, ib, jb, m, n; 00280 { 00281 int dummy, nprocs; 00282 int gcontext; 00283 /* first we initialize a global grid which serve as a reference to 00284 * communicate from grid a to grid b */ 00285 Cblacs_pinfo(&dummy, &nprocs); 00286 Cblacs_get(0, 0, &gcontext); 00287 Cblacs_gridinit(&gcontext, "R", 1, nprocs); 00288 Cpigemr2d(m, n, ptrmyblock, ia, ja, ma, 00289 ptrmynewblock, ib, jb, mb, gcontext); 00290 Cblacs_gridexit(gcontext); 00291 } 00292 #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis 00293 * idem B puis ia,ja puis ib,jb */ 00294 #define MAGIC_MAX 100000000 00295 void 00296 Cpigemr2d(m, n, 00297 ptrmyblock, ia, ja, ma, 00298 ptrmynewblock, ib, jb, mb, globcontext) 00299 int *ptrmyblock, *ptrmynewblock; 00300 /* pointers to the memory location of the matrix and the redistributed matrix */ 00301 MDESC *ma; 00302 MDESC *mb; 00303 int ia, ja, ib, jb, m, n, globcontext; 00304 { 00305 int *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; 00306 int *recvptr; 00307 MDESC newa, newb; 00308 int *proc0, *proc1, *param; 00309 int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; 00310 int i, j; 00311 int nprow, npcol, gcontext; 00312 int recvsize, sendsize; 00313 IDESC *h_inter; /* to store the horizontal intersections */ 00314 IDESC *v_inter; /* to store the vertical intersections */ 00315 int hinter_nb, vinter_nb; /* number of intrsections in both directions */ 00316 int dummy; 00317 int p0, q0, p1, q1; 00318 int *ra, *ca; 00319 /* end of variables */ 00320 /* To simplify further calcul we change the matrix indexation from 00321 * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ 00322 if (m == 0 || n == 0) 00323 return; 00324 ia -= 1; 00325 ja -= 1; 00326 ib -= 1; 00327 jb -= 1; 00328 Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); 00329 gcontext = globcontext; 00330 nprocs = nprow * npcol; 00331 /* if the global context that is given to us has not the shape of a line 00332 * (nprow != 1), create a new context. TODO: to be optimal, we should 00333 * avoid this because it is an uncessary synchronisation */ 00334 if (nprow != 1) { 00335 gridreshape(&gcontext); 00336 Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); 00337 } 00338 Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); 00339 /* compatibility T3D, must check myprow and mypcol are within bounds */ 00340 if (myprow0 >= p0 || mypcol0 >= q0) 00341 myprow0 = mypcol0 = -1; 00342 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); 00343 Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); 00344 if (myprow1 >= p1 || mypcol1 >= q1) 00345 myprow1 = mypcol1 = -1; 00346 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); 00347 /* exchange the missing parameters among the processors: shape of grids and 00348 * location of the processors */ 00349 param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); 00350 ra = param + nprocs * 2 + NBPARAM; 00351 ca = param + (nprocs * 2 + NBPARAM) * 2; 00352 for (i = 0; i < nprocs * 2 + NBPARAM; i++) 00353 param[i] = MAGIC_MAX; 00354 proc0 = param + NBPARAM; 00355 proc1 = param + NBPARAM + nprocs; 00356 /* we calulate proc0 and proc1 that will give the number of a proc in 00357 * respectively a or b in the global context */ 00358 if (myprow0 >= 0) { 00359 proc0[myprow0 * q0 + mypcol0] = mypnum; 00360 param[0] = p0; 00361 param[1] = q0; 00362 param[4] = ma->m; 00363 param[5] = ma->n; 00364 param[6] = ma->nbrow; 00365 param[7] = ma->nbcol; 00366 param[8] = ma->sprow; 00367 param[9] = ma->spcol; 00368 param[10] = ia; 00369 param[11] = ja; 00370 } 00371 if (myprow1 >= 0) { 00372 proc1[myprow1 * q1 + mypcol1] = mypnum; 00373 param[2] = p1; 00374 param[3] = q1; 00375 param[12] = mb->m; 00376 param[13] = mb->n; 00377 param[14] = mb->nbrow; 00378 param[15] = mb->nbcol; 00379 param[16] = mb->sprow; 00380 param[17] = mb->spcol; 00381 param[18] = ib; 00382 param[19] = jb; 00383 } 00384 Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, 00385 ra, ca, 2 * nprocs + NBPARAM, -1, -1); 00386 newa = *ma; 00387 newb = *mb; 00388 ma = &newa; 00389 mb = &newb; 00390 if (myprow0 == -1) { 00391 p0 = param[0]; 00392 q0 = param[1]; 00393 ma->m = param[4]; 00394 ma->n = param[5]; 00395 ma->nbrow = param[6]; 00396 ma->nbcol = param[7]; 00397 ma->sprow = param[8]; 00398 ma->spcol = param[9]; 00399 ia = param[10]; 00400 ja = param[11]; 00401 } 00402 if (myprow1 == -1) { 00403 p1 = param[2]; 00404 q1 = param[3]; 00405 mb->m = param[12]; 00406 mb->n = param[13]; 00407 mb->nbrow = param[14]; 00408 mb->nbcol = param[15]; 00409 mb->sprow = param[16]; 00410 mb->spcol = param[17]; 00411 ib = param[18]; 00412 jb = param[19]; 00413 } 00414 for (i = 0; i < NBPARAM; i++) { 00415 if (param[i] == MAGIC_MAX) { 00416 fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); 00417 exit(1); 00418 } 00419 } 00420 #ifndef NDEBUG 00421 for (i = 0; i < p0 * q0; i++) 00422 assert(proc0[i] >= 0 && proc0[i] < nprocs); 00423 for (i = 0; i < p1 * q1; i++) 00424 assert(proc1[i] >= 0 && proc1[i] < nprocs); 00425 #endif 00426 /* check the validity of the parameters */ 00427 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); 00428 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); 00429 /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ 00430 { 00431 int decal; 00432 ia = changeorigin(myprow0, ma->sprow, p0, 00433 ma->nbrow, ia, &decal, &ma->sprow); 00434 ptrmyblock += decal; 00435 ja = changeorigin(mypcol0, ma->spcol, q0, 00436 ma->nbcol, ja, &decal, &ma->spcol); 00437 ptrmyblock += decal * ma->lda; 00438 ma->m = ia + m; 00439 ma->n = ja + n; 00440 ib = changeorigin(myprow1, mb->sprow, p1, 00441 mb->nbrow, ib, &decal, &mb->sprow); 00442 ptrmynewblock += decal; 00443 jb = changeorigin(mypcol1, mb->spcol, q1, 00444 mb->nbcol, jb, &decal, &mb->spcol); 00445 ptrmynewblock += decal * mb->lda; 00446 mb->m = ib + m; 00447 mb->n = jb + n; 00448 if (p0 == 1) 00449 ma->nbrow = ma->m; 00450 if (q0 == 1) 00451 ma->nbcol = ma->n; 00452 if (p1 == 1) 00453 mb->nbrow = mb->m; 00454 if (q1 == 1) 00455 mb->nbcol = mb->n; 00456 #ifndef NDEBUG 00457 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); 00458 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); 00459 #endif 00460 } 00461 /* We compute the size of the memory buffer ( we choose the worst case, 00462 * when the buffer sizes == the memory block sizes). */ 00463 if (myprow0 >= 0 && mypcol0 >= 0) { 00464 /* Initialize pointer variables */ 00465 setmemory(&ptrsendbuff, memoryblocksize(ma)); 00466 }; /* if (mypnum < p0 * q0) */ 00467 if (myprow1 >= 0 && mypcol1 >= 0) { 00468 /* Initialize pointer variables */ 00469 setmemory(&ptrrecvbuff, memoryblocksize(mb)); 00470 }; /* if (mypnum < p1 * q1) */ 00471 /* allocing room for the tabs, alloc for the worst case,local_n or local_m 00472 * intervals, in fact the worst case should be less, perhaps half that,I 00473 * should think of that one day. */ 00474 h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * 00475 ma->nbcol * sizeof(IDESC)); 00476 v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) 00477 * ma->nbrow * sizeof(IDESC)); 00478 /* We go for the scanning of indices. For each processor including mypnum, 00479 * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send 00480 * it. Then for each processor, we compute the size of message to be 00481 * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements 00482 * of recvbuff the right place (scanD)(RECVBUFF)) */ 00483 recvptr = ptrrecvbuff; 00484 { 00485 int tot, myrang, step, sens; 00486 int *sender, *recver; 00487 int mesending, merecving; 00488 tot = max(p0 * q0, p1 * q1); 00489 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, 00490 &sender, &recver, &myrang); 00491 if (myrang == -1) 00492 goto after_comm; 00493 mesending = myprow0 >= 0; 00494 assert(sender[myrang] >= 0 || !mesending); 00495 assert(!mesending || proc0[sender[myrang]] == mypnum); 00496 merecving = myprow1 >= 0; 00497 assert(recver[myrang] >= 0 || !merecving); 00498 assert(!merecving || proc1[recver[myrang]] == mypnum); 00499 step = tot - 1 - myrang; 00500 do { 00501 for (sens = 0; sens < 2; sens++) { 00502 /* be careful here, when we communicating with ourselves, we must 00503 * send first (myrang > step == 0) */ 00504 if (mesending && recver[step] >= 0 && 00505 (sens == 0)) { 00506 i = recver[step] / q1; 00507 j = recver[step] % q1; 00508 vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, 00509 v_inter); 00510 hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, 00511 h_inter); 00512 sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb, 00513 ptrmyblock, ma, ptrsendbuff); 00514 } /* if (mesending...) { */ 00515 if (mesending && recver[step] >= 0 && 00516 (sens == myrang > step)) { 00517 i = recver[step] / q1; 00518 j = recver[step] % q1; 00519 if (sendsize > 0 00520 && (step != myrang || !merecving) 00521 ) { 00522 Cigesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 00523 0, proc1[i * q1 + j]); 00524 } /* sendsize > 0 */ 00525 } /* if (mesending ... */ 00526 if (merecving && sender[step] >= 0 && 00527 (sens == myrang <= step)) { 00528 i = sender[step] / q0; 00529 j = sender[step] % q0; 00530 vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, 00531 v_inter); 00532 hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, 00533 h_inter); 00534 recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter); 00535 if (recvsize > 0) { 00536 if (step == myrang && mesending) { 00537 Clacpy(recvsize, 1, 00538 ptrsendbuff, recvsize, 00539 ptrrecvbuff, recvsize); 00540 } else { 00541 Cigerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 00542 0, proc0[i * q0 + j]); 00543 } 00544 } /* recvsize > 0 */ 00545 } /* if (merecving ...) */ 00546 if (merecving && sender[step] >= 0 && sens == 1) { 00547 buff2block(v_inter, vinter_nb, h_inter, hinter_nb, 00548 recvptr, ptrmynewblock, mb); 00549 } /* if (merecving...) */ 00550 } /* for (sens = 0) */ 00551 step -= 1; 00552 if (step < 0) 00553 step = tot - 1; 00554 } while (step != tot - 1 - myrang); 00555 after_comm: 00556 free(sender); 00557 } /* { int tot,nr,ns ...} */ 00558 /* don't forget to clean up things! */ 00559 if (myprow1 >= 0 && mypcol1 >= 0) { 00560 freememory((char *) ptrrecvbuff); 00561 }; 00562 if (myprow0 >= 0 && mypcol0 >= 0) { 00563 freememory((char *) ptrsendbuff); 00564 }; 00565 if (nprow != 1) 00566 Cblacs_gridexit(gcontext); 00567 free(v_inter); 00568 free(h_inter); 00569 free(param); 00570 }/* distrib */ 00571 static2 void 00572 init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) 00573 int nprocs, mypnum, n0, n1; 00574 int *proc0, *proc1, **psend, **precv, *myrang; 00575 { 00576 int ns, nr, i, tot; 00577 int *sender, *recver, *g0, *g1; 00578 tot = max(n0, n1); 00579 sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); 00580 recver = sender + tot; 00581 *psend = sender; 00582 *precv = recver; 00583 g0 = recver + tot; 00584 g1 = g0 + nprocs; 00585 for (i = 0; i < nprocs; i++) { 00586 g0[i] = -1; 00587 g1[i] = -1; 00588 } 00589 for (i = 0; i < tot; i++) { 00590 sender[i] = -1; 00591 recver[i] = -1; 00592 } 00593 for (i = 0; i < n0; i++) 00594 g0[proc0[i]] = i; 00595 for (i = 0; i < n1; i++) 00596 g1[proc1[i]] = i; 00597 ns = 0; 00598 nr = 0; 00599 *myrang = -1; 00600 for (i = 0; i < nprocs; i++) 00601 if (g0[i] >= 0 && g1[i] >= 0) { 00602 if (i == mypnum) 00603 *myrang = nr; 00604 sender[ns] = g0[i]; 00605 ns += 1; 00606 recver[nr] = g1[i]; 00607 nr += 1; 00608 assert(ns <= n0 && nr <= n1 && nr == ns); 00609 } 00610 for (i = 0; i < nprocs; i++) 00611 if (g0[i] >= 0 && g1[i] < 0) { 00612 if (i == mypnum) 00613 *myrang = ns; 00614 sender[ns] = g0[i]; 00615 ns += 1; 00616 assert(ns <= n0); 00617 } 00618 for (i = 0; i < nprocs; i++) 00619 if (g1[i] >= 0 && g0[i] < 0) { 00620 if (i == mypnum) 00621 *myrang = nr; 00622 recver[nr] = g1[i]; 00623 nr += 1; 00624 assert(nr <= n1); 00625 } 00626 } 00627 #define Mlacpy(mo,no,ao,ldao,bo,ldbo) \ 00628 { \ 00629 int *_a,*_b; \ 00630 int _m,_n,_lda,_ldb; \ 00631 int _i,_j; \ 00632 _m = (mo);_n = (no); \ 00633 _a = (ao);_b = (bo); \ 00634 _lda = (ldao) - _m; \ 00635 _ldb = (ldbo) - _m; \ 00636 assert(_lda >= 0 && _ldb >= 0); \ 00637 for (_j=0;_j<_n;_j++) { \ 00638 for (_i=0;_i<_m;_i++) \ 00639 *_b++ = *_a++; \ 00640 _b += _ldb; \ 00641 _a += _lda; \ 00642 } \ 00643 } 00644 static2 int 00645 block2buff(vi, vinb, hi, hinb, ptra, ma, buff) 00646 int hinb, vinb; 00647 IDESC *hi, *vi; 00648 MDESC *ma; 00649 int *buff, *ptra; 00650 { 00651 int h, v, sizebuff; 00652 int *ptr2; 00653 sizebuff = 0; 00654 for (h = 0; h < hinb; h++) { 00655 ptr2 = ptra + hi[h].lstart * ma->lda; 00656 for (v = 0; v < vinb; v++) { 00657 Mlacpy(vi[v].len, hi[h].len, 00658 ptr2 + vi[v].lstart, 00659 ma->lda, 00660 buff + sizebuff, vi[v].len); 00661 sizebuff += hi[h].len * vi[v].len; 00662 } 00663 } 00664 return sizebuff; 00665 } 00666 static2 void 00667 buff2block(vi, vinb, hi, hinb, buff, ptrb, mb) 00668 int hinb, vinb; 00669 IDESC *hi, *vi; 00670 MDESC *mb; 00671 int *buff, *ptrb; 00672 { 00673 int h, v, sizebuff; 00674 int *ptr2; 00675 sizebuff = 0; 00676 for (h = 0; h < hinb; h++) { 00677 ptr2 = ptrb + hi[h].lstart * mb->lda; 00678 for (v = 0; v < vinb; v++) { 00679 Mlacpy(vi[v].len, hi[h].len, 00680 buff + sizebuff, vi[v].len, 00681 ptr2 + vi[v].lstart, 00682 mb->lda); 00683 sizebuff += hi[h].len * vi[v].len; 00684 } 00685 } 00686 } 00687 static2 int 00688 inter_len(hinb, hi, vinb, vi) 00689 int hinb, vinb; 00690 IDESC *hi, *vi; 00691 { 00692 int hlen, vlen, h, v; 00693 hlen = 0; 00694 for (h = 0; h < hinb; h++) 00695 hlen += hi[h].len; 00696 vlen = 0; 00697 for (v = 0; v < vinb; v++) 00698 vlen += vi[v].len; 00699 return hlen * vlen; 00700 } 00701 void 00702 Clacpy(m, n, a, lda, b, ldb) 00703 int *a, *b; 00704 int m, n, lda, ldb; 00705 { 00706 int i, j; 00707 lda -= m; 00708 ldb -= m; 00709 assert(lda >= 0 && ldb >= 0); 00710 for (j = 0; j < n; j++) { 00711 for (i = 0; i < m; i++) 00712 *b++ = *a++; 00713 b += ldb; 00714 a += lda; 00715 } 00716 } 00717 static2 void 00718 gridreshape(ctxtp) 00719 int *ctxtp; 00720 { 00721 int ori, final; /* original context, and new context created, with 00722 * line form */ 00723 int nprow, npcol, myrow, mycol; 00724 int *usermap; 00725 int i, j; 00726 ori = *ctxtp; 00727 Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); 00728 usermap = mr2d_malloc(sizeof(int) * nprow * npcol); 00729 for (i = 0; i < nprow; i++) 00730 for (j = 0; j < npcol; j++) { 00731 usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); 00732 } 00733 /* Cblacs_get(0, 0, &final); */ 00734 Cblacs_get(ori, 10, &final); 00735 Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); 00736 *ctxtp = final; 00737 free(usermap); 00738 }