|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 #include "redist.h" 00143 #define static2 static 00144 #if defined(Add_) || defined(f77IsF2C) 00145 #define fortran_mr2d pzgemr2do_ 00146 #define fortran_mr2dnew pzgemr2d_ 00147 #elif defined(UpCase) 00148 #define fortran_mr2dnew PZGEMR2D 00149 #define fortran_mr2d PZGEMR2DO 00150 #define zcopy_ ZCOPY 00151 #define zlacpy_ ZLACPY 00152 #else 00153 #define fortran_mr2d pzgemr2do 00154 #define fortran_mr2dnew pzgemr2d 00155 #define zcopy_ zcopy 00156 #define zlacpy_ zlacpy 00157 #endif 00158 #define Clacpy Czgelacpy 00159 void Clacpy(); 00160 typedef struct { 00161 double r, i; 00162 } dcomplex; 00163 typedef struct { 00164 int desctype; 00165 int ctxt; 00166 int m; 00167 int n; 00168 int nbrow; 00169 int nbcol; 00170 int sprow; 00171 int spcol; 00172 int lda; 00173 } MDESC; 00174 #define BLOCK_CYCLIC_2D 1 00175 typedef struct { 00176 int lstart; 00177 int len; 00178 } IDESC; 00179 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) 00180 #define max(A,B) ((A)>(B)?(A):(B)) 00181 #define min(A,B) ((A)>(B)?(B):(A)) 00182 #define DIVUP(a,b) ( ((a)-1) /(b)+1) 00183 #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) 00184 #ifdef MALLOCDEBUG 00185 #define malloc mymalloc 00186 #define free myfree 00187 #define realloc myrealloc 00188 #endif 00189 /* Cblacs */ 00190 extern void Cblacs_pcoord(); 00191 extern int Cblacs_pnum(); 00192 extern void Csetpvmtids(); 00193 extern void Cblacs_get(); 00194 extern void Cblacs_pinfo(); 00195 extern void Cblacs_gridinfo(); 00196 extern void Cblacs_gridinit(); 00197 extern void Cblacs_exit(); 00198 extern void Cblacs_gridexit(); 00199 extern void Cblacs_setup(); 00200 extern void Cigebs2d(); 00201 extern void Cigebr2d(); 00202 extern void Cigesd2d(); 00203 extern void Cigerv2d(); 00204 extern void Cigsum2d(); 00205 extern void Cigamn2d(); 00206 extern void Cigamx2d(); 00207 extern void Czgesd2d(); 00208 extern void Czgerv2d(); 00209 /* lapack */ 00210 void zlacpy_(); 00211 /* aux fonctions */ 00212 extern int localindice(); 00213 extern void *mr2d_malloc(); 00214 extern int ppcm(); 00215 extern int localsize(); 00216 extern int memoryblocksize(); 00217 extern int changeorigin(); 00218 extern void paramcheck(); 00219 /* tools and others function */ 00220 #define scanD0 zgescanD0 00221 #define dispmat zgedispmat 00222 #define setmemory zgesetmemory 00223 #define freememory zgefreememory 00224 #define scan_intervals zgescan_intervals 00225 extern void scanD0(); 00226 extern void dispmat(); 00227 extern void setmemory(); 00228 extern void freememory(); 00229 extern int scan_intervals(); 00230 extern void Cpzgemr2do(); 00231 extern void Cpzgemr2d(); 00232 /* some defines for Cpzgemr2do */ 00233 #define SENDBUFF 0 00234 #define RECVBUFF 1 00235 #define SIZEBUFF 2 00236 #if 0 00237 #define DEBUG 00238 #endif 00239 #ifndef DEBUG 00240 #define NDEBUG 00241 #endif 00242 #include <stdio.h> 00243 #include <stdlib.h> 00244 #include <assert.h> 00245 #define DESCLEN 9 00246 void 00247 fortran_mr2d(m, n, A, ia, ja, desc_A, 00248 B, ib, jb, desc_B) 00249 int *ia, *ib, *ja, *jb, *m, *n; 00250 int desc_A[DESCLEN], desc_B[DESCLEN]; 00251 dcomplex *A, *B; 00252 { 00253 Cpzgemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A, 00254 B, *ib, *jb, (MDESC *) desc_B); 00255 return; 00256 } 00257 void 00258 fortran_mr2dnew(m, n, A, ia, ja, desc_A, 00259 B, ib, jb, desc_B, gcontext) 00260 int *ia, *ib, *ja, *jb, *m, *n; 00261 int desc_A[DESCLEN], desc_B[DESCLEN]; 00262 dcomplex *A, *B; 00263 int *gcontext; 00264 { 00265 Cpzgemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A, 00266 B, *ib, *jb, (MDESC *) desc_B, *gcontext); 00267 return; 00268 } 00269 static2 void init_chenille(); 00270 static2 int inter_len(); 00271 static2 int block2buff(); 00272 static2 void buff2block(); 00273 static2 void gridreshape(); 00274 void 00275 Cpzgemr2do(m, n, 00276 ptrmyblock, ia, ja, ma, 00277 ptrmynewblock, ib, jb, mb) 00278 dcomplex *ptrmyblock, *ptrmynewblock; 00279 /* pointers to the memory location of the matrix and the redistributed matrix */ 00280 MDESC *ma; 00281 MDESC *mb; 00282 int ia, ja, ib, jb, m, n; 00283 { 00284 int dummy, nprocs; 00285 int gcontext; 00286 /* first we initialize a global grid which serve as a reference to 00287 * communicate from grid a to grid b */ 00288 Cblacs_pinfo(&dummy, &nprocs); 00289 Cblacs_get(0, 0, &gcontext); 00290 Cblacs_gridinit(&gcontext, "R", 1, nprocs); 00291 Cpzgemr2d(m, n, ptrmyblock, ia, ja, ma, 00292 ptrmynewblock, ib, jb, mb, gcontext); 00293 Cblacs_gridexit(gcontext); 00294 } 00295 #define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis 00296 * idem B puis ia,ja puis ib,jb */ 00297 #define MAGIC_MAX 100000000 00298 void 00299 Cpzgemr2d(m, n, 00300 ptrmyblock, ia, ja, ma, 00301 ptrmynewblock, ib, jb, mb, globcontext) 00302 dcomplex *ptrmyblock, *ptrmynewblock; 00303 /* pointers to the memory location of the matrix and the redistributed matrix */ 00304 MDESC *ma; 00305 MDESC *mb; 00306 int ia, ja, ib, jb, m, n, globcontext; 00307 { 00308 dcomplex *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0; 00309 dcomplex *recvptr; 00310 MDESC newa, newb; 00311 int *proc0, *proc1, *param; 00312 int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs; 00313 int i, j; 00314 int nprow, npcol, gcontext; 00315 int recvsize, sendsize; 00316 IDESC *h_inter; /* to store the horizontal intersections */ 00317 IDESC *v_inter; /* to store the vertical intersections */ 00318 int hinter_nb, vinter_nb; /* number of intrsections in both directions */ 00319 int dummy; 00320 int p0, q0, p1, q1; 00321 int *ra, *ca; 00322 /* end of variables */ 00323 /* To simplify further calcul we change the matrix indexation from 00324 * 1..m,1..n (fortran) to 0..m-1,0..n-1 */ 00325 if (m == 0 || n == 0) 00326 return; 00327 ia -= 1; 00328 ja -= 1; 00329 ib -= 1; 00330 jb -= 1; 00331 Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum); 00332 gcontext = globcontext; 00333 nprocs = nprow * npcol; 00334 /* if the global context that is given to us has not the shape of a line 00335 * (nprow != 1), create a new context. TODO: to be optimal, we should 00336 * avoid this because it is an uncessary synchronisation */ 00337 if (nprow != 1) { 00338 gridreshape(&gcontext); 00339 Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum); 00340 } 00341 Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0); 00342 /* compatibility T3D, must check myprow and mypcol are within bounds */ 00343 if (myprow0 >= p0 || mypcol0 >= q0) 00344 myprow0 = mypcol0 = -1; 00345 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); 00346 Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1); 00347 if (myprow1 >= p1 || mypcol1 >= q1) 00348 myprow1 = mypcol1 = -1; 00349 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); 00350 /* exchange the missing parameters among the processors: shape of grids and 00351 * location of the processors */ 00352 param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int)); 00353 ra = param + nprocs * 2 + NBPARAM; 00354 ca = param + (nprocs * 2 + NBPARAM) * 2; 00355 for (i = 0; i < nprocs * 2 + NBPARAM; i++) 00356 param[i] = MAGIC_MAX; 00357 proc0 = param + NBPARAM; 00358 proc1 = param + NBPARAM + nprocs; 00359 /* we calulate proc0 and proc1 that will give the number of a proc in 00360 * respectively a or b in the global context */ 00361 if (myprow0 >= 0) { 00362 proc0[myprow0 * q0 + mypcol0] = mypnum; 00363 param[0] = p0; 00364 param[1] = q0; 00365 param[4] = ma->m; 00366 param[5] = ma->n; 00367 param[6] = ma->nbrow; 00368 param[7] = ma->nbcol; 00369 param[8] = ma->sprow; 00370 param[9] = ma->spcol; 00371 param[10] = ia; 00372 param[11] = ja; 00373 } 00374 if (myprow1 >= 0) { 00375 proc1[myprow1 * q1 + mypcol1] = mypnum; 00376 param[2] = p1; 00377 param[3] = q1; 00378 param[12] = mb->m; 00379 param[13] = mb->n; 00380 param[14] = mb->nbrow; 00381 param[15] = mb->nbcol; 00382 param[16] = mb->sprow; 00383 param[17] = mb->spcol; 00384 param[18] = ib; 00385 param[19] = jb; 00386 } 00387 Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM, 00388 ra, ca, 2 * nprocs + NBPARAM, -1, -1); 00389 newa = *ma; 00390 newb = *mb; 00391 ma = &newa; 00392 mb = &newb; 00393 if (myprow0 == -1) { 00394 p0 = param[0]; 00395 q0 = param[1]; 00396 ma->m = param[4]; 00397 ma->n = param[5]; 00398 ma->nbrow = param[6]; 00399 ma->nbcol = param[7]; 00400 ma->sprow = param[8]; 00401 ma->spcol = param[9]; 00402 ia = param[10]; 00403 ja = param[11]; 00404 } 00405 if (myprow1 == -1) { 00406 p1 = param[2]; 00407 q1 = param[3]; 00408 mb->m = param[12]; 00409 mb->n = param[13]; 00410 mb->nbrow = param[14]; 00411 mb->nbcol = param[15]; 00412 mb->sprow = param[16]; 00413 mb->spcol = param[17]; 00414 ib = param[18]; 00415 jb = param[19]; 00416 } 00417 for (i = 0; i < NBPARAM; i++) { 00418 if (param[i] == MAGIC_MAX) { 00419 fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n"); 00420 exit(1); 00421 } 00422 } 00423 #ifndef NDEBUG 00424 for (i = 0; i < p0 * q0; i++) 00425 assert(proc0[i] >= 0 && proc0[i] < nprocs); 00426 for (i = 0; i < p1 * q1; i++) 00427 assert(proc1[i] >= 0 && proc1[i] < nprocs); 00428 #endif 00429 /* check the validity of the parameters */ 00430 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); 00431 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); 00432 /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */ 00433 { 00434 int decal; 00435 ia = changeorigin(myprow0, ma->sprow, p0, 00436 ma->nbrow, ia, &decal, &ma->sprow); 00437 ptrmyblock += decal; 00438 ja = changeorigin(mypcol0, ma->spcol, q0, 00439 ma->nbcol, ja, &decal, &ma->spcol); 00440 ptrmyblock += decal * ma->lda; 00441 ma->m = ia + m; 00442 ma->n = ja + n; 00443 ib = changeorigin(myprow1, mb->sprow, p1, 00444 mb->nbrow, ib, &decal, &mb->sprow); 00445 ptrmynewblock += decal; 00446 jb = changeorigin(mypcol1, mb->spcol, q1, 00447 mb->nbcol, jb, &decal, &mb->spcol); 00448 ptrmynewblock += decal * mb->lda; 00449 mb->m = ib + m; 00450 mb->n = jb + n; 00451 if (p0 == 1) 00452 ma->nbrow = ma->m; 00453 if (q0 == 1) 00454 ma->nbcol = ma->n; 00455 if (p1 == 1) 00456 mb->nbrow = mb->m; 00457 if (q1 == 1) 00458 mb->nbcol = mb->n; 00459 #ifndef NDEBUG 00460 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext); 00461 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext); 00462 #endif 00463 } 00464 /* We compute the size of the memory buffer ( we choose the worst case, 00465 * when the buffer sizes == the memory block sizes). */ 00466 if (myprow0 >= 0 && mypcol0 >= 0) { 00467 /* Initialize pointer variables */ 00468 setmemory(&ptrsendbuff, memoryblocksize(ma)); 00469 }; /* if (mypnum < p0 * q0) */ 00470 if (myprow1 >= 0 && mypcol1 >= 0) { 00471 /* Initialize pointer variables */ 00472 setmemory(&ptrrecvbuff, memoryblocksize(mb)); 00473 }; /* if (mypnum < p1 * q1) */ 00474 /* allocing room for the tabs, alloc for the worst case,local_n or local_m 00475 * intervals, in fact the worst case should be less, perhaps half that,I 00476 * should think of that one day. */ 00477 h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) * 00478 ma->nbcol * sizeof(IDESC)); 00479 v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow) 00480 * ma->nbrow * sizeof(IDESC)); 00481 /* We go for the scanning of indices. For each processor including mypnum, 00482 * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send 00483 * it. Then for each processor, we compute the size of message to be 00484 * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements 00485 * of recvbuff the right place (scanD)(RECVBUFF)) */ 00486 recvptr = ptrrecvbuff; 00487 { 00488 int tot, myrang, step, sens; 00489 int *sender, *recver; 00490 int mesending, merecving; 00491 tot = max(p0 * q0, p1 * q1); 00492 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1, 00493 &sender, &recver, &myrang); 00494 if (myrang == -1) 00495 goto after_comm; 00496 mesending = myprow0 >= 0; 00497 assert(sender[myrang] >= 0 || !mesending); 00498 assert(!mesending || proc0[sender[myrang]] == mypnum); 00499 merecving = myprow1 >= 0; 00500 assert(recver[myrang] >= 0 || !merecving); 00501 assert(!merecving || proc1[recver[myrang]] == mypnum); 00502 step = tot - 1 - myrang; 00503 do { 00504 for (sens = 0; sens < 2; sens++) { 00505 /* be careful here, when we communicating with ourselves, we must 00506 * send first (myrang > step == 0) */ 00507 if (mesending && recver[step] >= 0 && 00508 (sens == 0)) { 00509 i = recver[step] / q1; 00510 j = recver[step] % q1; 00511 vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i, 00512 v_inter); 00513 hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j, 00514 h_inter); 00515 sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb, 00516 ptrmyblock, ma, ptrsendbuff); 00517 } /* if (mesending...) { */ 00518 if (mesending && recver[step] >= 0 && 00519 (sens == myrang > step)) { 00520 i = recver[step] / q1; 00521 j = recver[step] % q1; 00522 if (sendsize > 0 00523 && (step != myrang || !merecving) 00524 ) { 00525 Czgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize, 00526 0, proc1[i * q1 + j]); 00527 } /* sendsize > 0 */ 00528 } /* if (mesending ... */ 00529 if (merecving && sender[step] >= 0 && 00530 (sens == myrang <= step)) { 00531 i = sender[step] / q0; 00532 j = sender[step] % q0; 00533 vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i, 00534 v_inter); 00535 hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j, 00536 h_inter); 00537 recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter); 00538 if (recvsize > 0) { 00539 if (step == myrang && mesending) { 00540 Clacpy(recvsize, 1, 00541 ptrsendbuff, recvsize, 00542 ptrrecvbuff, recvsize); 00543 } else { 00544 Czgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize, 00545 0, proc0[i * q0 + j]); 00546 } 00547 } /* recvsize > 0 */ 00548 } /* if (merecving ...) */ 00549 if (merecving && sender[step] >= 0 && sens == 1) { 00550 buff2block(v_inter, vinter_nb, h_inter, hinter_nb, 00551 recvptr, ptrmynewblock, mb); 00552 } /* if (merecving...) */ 00553 } /* for (sens = 0) */ 00554 step -= 1; 00555 if (step < 0) 00556 step = tot - 1; 00557 } while (step != tot - 1 - myrang); 00558 after_comm: 00559 free(sender); 00560 } /* { int tot,nr,ns ...} */ 00561 /* don't forget to clean up things! */ 00562 if (myprow1 >= 0 && mypcol1 >= 0) { 00563 freememory((char *) ptrrecvbuff); 00564 }; 00565 if (myprow0 >= 0 && mypcol0 >= 0) { 00566 freememory((char *) ptrsendbuff); 00567 }; 00568 if (nprow != 1) 00569 Cblacs_gridexit(gcontext); 00570 free(v_inter); 00571 free(h_inter); 00572 free(param); 00573 }/* distrib */ 00574 static2 void 00575 init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang) 00576 int nprocs, mypnum, n0, n1; 00577 int *proc0, *proc1, **psend, **precv, *myrang; 00578 { 00579 int ns, nr, i, tot; 00580 int *sender, *recver, *g0, *g1; 00581 tot = max(n0, n1); 00582 sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2); 00583 recver = sender + tot; 00584 *psend = sender; 00585 *precv = recver; 00586 g0 = recver + tot; 00587 g1 = g0 + nprocs; 00588 for (i = 0; i < nprocs; i++) { 00589 g0[i] = -1; 00590 g1[i] = -1; 00591 } 00592 for (i = 0; i < tot; i++) { 00593 sender[i] = -1; 00594 recver[i] = -1; 00595 } 00596 for (i = 0; i < n0; i++) 00597 g0[proc0[i]] = i; 00598 for (i = 0; i < n1; i++) 00599 g1[proc1[i]] = i; 00600 ns = 0; 00601 nr = 0; 00602 *myrang = -1; 00603 for (i = 0; i < nprocs; i++) 00604 if (g0[i] >= 0 && g1[i] >= 0) { 00605 if (i == mypnum) 00606 *myrang = nr; 00607 sender[ns] = g0[i]; 00608 ns += 1; 00609 recver[nr] = g1[i]; 00610 nr += 1; 00611 assert(ns <= n0 && nr <= n1 && nr == ns); 00612 } 00613 for (i = 0; i < nprocs; i++) 00614 if (g0[i] >= 0 && g1[i] < 0) { 00615 if (i == mypnum) 00616 *myrang = ns; 00617 sender[ns] = g0[i]; 00618 ns += 1; 00619 assert(ns <= n0); 00620 } 00621 for (i = 0; i < nprocs; i++) 00622 if (g1[i] >= 0 && g0[i] < 0) { 00623 if (i == mypnum) 00624 *myrang = nr; 00625 recver[nr] = g1[i]; 00626 nr += 1; 00627 assert(nr <= n1); 00628 } 00629 } 00630 #define Mlacpy(mo,no,ao,ldao,bo,ldbo) \ 00631 { \ 00632 dcomplex *_a,*_b; \ 00633 int _m,_n,_lda,_ldb; \ 00634 int _i,_j; \ 00635 _m = (mo);_n = (no); \ 00636 _a = (ao);_b = (bo); \ 00637 _lda = (ldao) - _m; \ 00638 _ldb = (ldbo) - _m; \ 00639 assert(_lda >= 0 && _ldb >= 0); \ 00640 for (_j=0;_j<_n;_j++) { \ 00641 for (_i=0;_i<_m;_i++) \ 00642 *_b++ = *_a++; \ 00643 _b += _ldb; \ 00644 _a += _lda; \ 00645 } \ 00646 } 00647 static2 int 00648 block2buff(vi, vinb, hi, hinb, ptra, ma, buff) 00649 int hinb, vinb; 00650 IDESC *hi, *vi; 00651 MDESC *ma; 00652 dcomplex *buff, *ptra; 00653 { 00654 int h, v, sizebuff; 00655 dcomplex *ptr2; 00656 sizebuff = 0; 00657 for (h = 0; h < hinb; h++) { 00658 ptr2 = ptra + hi[h].lstart * ma->lda; 00659 for (v = 0; v < vinb; v++) { 00660 Mlacpy(vi[v].len, hi[h].len, 00661 ptr2 + vi[v].lstart, 00662 ma->lda, 00663 buff + sizebuff, vi[v].len); 00664 sizebuff += hi[h].len * vi[v].len; 00665 } 00666 } 00667 return sizebuff; 00668 } 00669 static2 void 00670 buff2block(vi, vinb, hi, hinb, buff, ptrb, mb) 00671 int hinb, vinb; 00672 IDESC *hi, *vi; 00673 MDESC *mb; 00674 dcomplex *buff, *ptrb; 00675 { 00676 int h, v, sizebuff; 00677 dcomplex *ptr2; 00678 sizebuff = 0; 00679 for (h = 0; h < hinb; h++) { 00680 ptr2 = ptrb + hi[h].lstart * mb->lda; 00681 for (v = 0; v < vinb; v++) { 00682 Mlacpy(vi[v].len, hi[h].len, 00683 buff + sizebuff, vi[v].len, 00684 ptr2 + vi[v].lstart, 00685 mb->lda); 00686 sizebuff += hi[h].len * vi[v].len; 00687 } 00688 } 00689 } 00690 static2 int 00691 inter_len(hinb, hi, vinb, vi) 00692 int hinb, vinb; 00693 IDESC *hi, *vi; 00694 { 00695 int hlen, vlen, h, v; 00696 hlen = 0; 00697 for (h = 0; h < hinb; h++) 00698 hlen += hi[h].len; 00699 vlen = 0; 00700 for (v = 0; v < vinb; v++) 00701 vlen += vi[v].len; 00702 return hlen * vlen; 00703 } 00704 void 00705 Clacpy(m, n, a, lda, b, ldb) 00706 dcomplex *a, *b; 00707 int m, n, lda, ldb; 00708 { 00709 int i, j; 00710 lda -= m; 00711 ldb -= m; 00712 assert(lda >= 0 && ldb >= 0); 00713 for (j = 0; j < n; j++) { 00714 for (i = 0; i < m; i++) 00715 *b++ = *a++; 00716 b += ldb; 00717 a += lda; 00718 } 00719 } 00720 static2 void 00721 gridreshape(ctxtp) 00722 int *ctxtp; 00723 { 00724 int ori, final; /* original context, and new context created, with 00725 * line form */ 00726 int nprow, npcol, myrow, mycol; 00727 int *usermap; 00728 int i, j; 00729 ori = *ctxtp; 00730 Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol); 00731 usermap = mr2d_malloc(sizeof(int) * nprow * npcol); 00732 for (i = 0; i < nprow; i++) 00733 for (j = 0; j < npcol; j++) { 00734 usermap[i + j * nprow] = Cblacs_pnum(ori, i, j); 00735 } 00736 /* Cblacs_get(0, 0, &final); */ 00737 Cblacs_get(ori, 10, &final); 00738 Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol); 00739 *ctxtp = final; 00740 free(usermap); 00741 }