ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pzgemr.c
Go to the documentation of this file.
00001 #include "redist.h"
00143 #define static2 static
00144 #if defined(Add_) || defined(f77IsF2C)
00145 #define fortran_mr2d pzgemr2do_
00146 #define fortran_mr2dnew pzgemr2d_
00147 #elif defined(UpCase)
00148 #define fortran_mr2dnew PZGEMR2D
00149 #define fortran_mr2d PZGEMR2DO
00150 #define zcopy_ ZCOPY
00151 #define zlacpy_ ZLACPY
00152 #else
00153 #define fortran_mr2d pzgemr2do
00154 #define fortran_mr2dnew pzgemr2d
00155 #define zcopy_ zcopy
00156 #define zlacpy_ zlacpy
00157 #endif
00158 #define Clacpy Czgelacpy
00159 void  Clacpy();
00160 typedef struct {
00161   double r, i;
00162 }     dcomplex;
00163 typedef struct {
00164   int   desctype;
00165   int   ctxt;
00166   int   m;
00167   int   n;
00168   int   nbrow;
00169   int   nbcol;
00170   int   sprow;
00171   int   spcol;
00172   int   lda;
00173 }     MDESC;
00174 #define BLOCK_CYCLIC_2D 1
00175 typedef struct {
00176   int   lstart;
00177   int   len;
00178 }     IDESC;
00179 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
00180 #define max(A,B) ((A)>(B)?(A):(B))
00181 #define min(A,B) ((A)>(B)?(B):(A))
00182 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
00183 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
00184 #ifdef MALLOCDEBUG
00185 #define malloc mymalloc
00186 #define free myfree
00187 #define realloc myrealloc
00188 #endif
00189 /* Cblacs */
00190 extern void Cblacs_pcoord();
00191 extern int Cblacs_pnum();
00192 extern void Csetpvmtids();
00193 extern void Cblacs_get();
00194 extern void Cblacs_pinfo();
00195 extern void Cblacs_gridinfo();
00196 extern void Cblacs_gridinit();
00197 extern void Cblacs_exit();
00198 extern void Cblacs_gridexit();
00199 extern void Cblacs_setup();
00200 extern void Cigebs2d();
00201 extern void Cigebr2d();
00202 extern void Cigesd2d();
00203 extern void Cigerv2d();
00204 extern void Cigsum2d();
00205 extern void Cigamn2d();
00206 extern void Cigamx2d();
00207 extern void Czgesd2d();
00208 extern void Czgerv2d();
00209 /* lapack */
00210 void  zlacpy_();
00211 /* aux fonctions */
00212 extern int localindice();
00213 extern void *mr2d_malloc();
00214 extern int ppcm();
00215 extern int localsize();
00216 extern int memoryblocksize();
00217 extern int changeorigin();
00218 extern void paramcheck();
00219 /* tools and others function */
00220 #define scanD0 zgescanD0
00221 #define dispmat zgedispmat
00222 #define setmemory zgesetmemory
00223 #define freememory zgefreememory
00224 #define scan_intervals zgescan_intervals
00225 extern void scanD0();
00226 extern void dispmat();
00227 extern void setmemory();
00228 extern void freememory();
00229 extern int scan_intervals();
00230 extern void Cpzgemr2do();
00231 extern void Cpzgemr2d();
00232 /* some defines for Cpzgemr2do */
00233 #define SENDBUFF 0
00234 #define RECVBUFF 1
00235 #define SIZEBUFF 2
00236 #if 0
00237 #define DEBUG
00238 #endif
00239 #ifndef DEBUG
00240 #define NDEBUG
00241 #endif
00242 #include <stdio.h>
00243 #include <stdlib.h>
00244 #include <assert.h>
00245 #define DESCLEN 9
00246 void 
00247 fortran_mr2d(m, n, A, ia, ja, desc_A,
00248              B, ib, jb, desc_B)
00249   int  *ia, *ib, *ja, *jb, *m, *n;
00250   int   desc_A[DESCLEN], desc_B[DESCLEN];
00251   dcomplex *A, *B;
00252 {
00253   Cpzgemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A,
00254              B, *ib, *jb, (MDESC *) desc_B);
00255   return;
00256 }
00257 void 
00258 fortran_mr2dnew(m, n, A, ia, ja, desc_A,
00259                 B, ib, jb, desc_B, gcontext)
00260   int  *ia, *ib, *ja, *jb, *m, *n;
00261   int   desc_A[DESCLEN], desc_B[DESCLEN];
00262   dcomplex *A, *B;
00263   int  *gcontext;
00264 {
00265   Cpzgemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A,
00266             B, *ib, *jb, (MDESC *) desc_B, *gcontext);
00267   return;
00268 }
00269 static2 void init_chenille();
00270 static2 int inter_len();
00271 static2 int block2buff();
00272 static2 void buff2block();
00273 static2 void gridreshape();
00274 void
00275 Cpzgemr2do(m, n,
00276            ptrmyblock, ia, ja, ma,
00277            ptrmynewblock, ib, jb, mb)
00278   dcomplex *ptrmyblock, *ptrmynewblock;
00279 /* pointers to the memory location of the matrix and the redistributed matrix */
00280   MDESC *ma;
00281   MDESC *mb;
00282   int   ia, ja, ib, jb, m, n;
00283 {
00284   int   dummy, nprocs;
00285   int   gcontext;
00286   /* first we initialize a global grid which serve as a reference to
00287    * communicate from grid a to grid b */
00288   Cblacs_pinfo(&dummy, &nprocs);
00289   Cblacs_get(0, 0, &gcontext);
00290   Cblacs_gridinit(&gcontext, "R", 1, nprocs);
00291   Cpzgemr2d(m, n, ptrmyblock, ia, ja, ma,
00292             ptrmynewblock, ib, jb, mb, gcontext);
00293   Cblacs_gridexit(gcontext);
00294 }
00295 #define NBPARAM 20      /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis
00296                          * idem B puis ia,ja puis ib,jb */
00297 #define MAGIC_MAX 100000000
00298 void
00299 Cpzgemr2d(m, n,
00300           ptrmyblock, ia, ja, ma,
00301           ptrmynewblock, ib, jb, mb, globcontext)
00302   dcomplex *ptrmyblock, *ptrmynewblock;
00303 /* pointers to the memory location of the matrix and the redistributed matrix */
00304   MDESC *ma;
00305   MDESC *mb;
00306   int   ia, ja, ib, jb, m, n, globcontext;
00307 {
00308   dcomplex *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
00309   dcomplex *recvptr;
00310   MDESC newa, newb;
00311   int  *proc0, *proc1, *param;
00312   int   mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
00313   int   i, j;
00314   int   nprow, npcol, gcontext;
00315   int   recvsize, sendsize;
00316   IDESC *h_inter;       /* to store the horizontal intersections */
00317   IDESC *v_inter;       /* to store the vertical intersections */
00318   int   hinter_nb, vinter_nb;   /* number of intrsections in both directions */
00319   int   dummy;
00320   int   p0, q0, p1, q1;
00321   int  *ra, *ca;
00322   /* end of variables */
00323   /* To simplify further calcul we change the matrix indexation from
00324    * 1..m,1..n (fortran) to 0..m-1,0..n-1 */
00325   if (m == 0 || n == 0)
00326     return;
00327   ia -= 1;
00328   ja -= 1;
00329   ib -= 1;
00330   jb -= 1;
00331   Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum);
00332   gcontext = globcontext;
00333   nprocs = nprow * npcol;
00334   /* if the global context that is given to us has not the shape of a line
00335    * (nprow != 1), create a new context.  TODO: to be optimal, we should
00336    * avoid this because it is an uncessary synchronisation */
00337   if (nprow != 1) {
00338     gridreshape(&gcontext);
00339     Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum);
00340   }
00341   Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0);
00342   /* compatibility T3D, must check myprow  and mypcol are within bounds */
00343   if (myprow0 >= p0 || mypcol0 >= q0)
00344     myprow0 = mypcol0 = -1;
00345   assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
00346   Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1);
00347   if (myprow1 >= p1 || mypcol1 >= q1)
00348     myprow1 = mypcol1 = -1;
00349   assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
00350   /* exchange the missing parameters among the processors: shape of grids and
00351    * location of the processors */
00352   param = (int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(int));
00353   ra = param + nprocs * 2 + NBPARAM;
00354   ca = param + (nprocs * 2 + NBPARAM) * 2;
00355   for (i = 0; i < nprocs * 2 + NBPARAM; i++)
00356     param[i] = MAGIC_MAX;
00357   proc0 = param + NBPARAM;
00358   proc1 = param + NBPARAM + nprocs;
00359   /* we calulate proc0 and proc1 that will give the number of a proc in
00360    * respectively a or b in the global context */
00361   if (myprow0 >= 0) {
00362     proc0[myprow0 * q0 + mypcol0] = mypnum;
00363     param[0] = p0;
00364     param[1] = q0;
00365     param[4] = ma->m;
00366     param[5] = ma->n;
00367     param[6] = ma->nbrow;
00368     param[7] = ma->nbcol;
00369     param[8] = ma->sprow;
00370     param[9] = ma->spcol;
00371     param[10] = ia;
00372     param[11] = ja;
00373   }
00374   if (myprow1 >= 0) {
00375     proc1[myprow1 * q1 + mypcol1] = mypnum;
00376     param[2] = p1;
00377     param[3] = q1;
00378     param[12] = mb->m;
00379     param[13] = mb->n;
00380     param[14] = mb->nbrow;
00381     param[15] = mb->nbcol;
00382     param[16] = mb->sprow;
00383     param[17] = mb->spcol;
00384     param[18] = ib;
00385     param[19] = jb;
00386   }
00387   Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, 1, param, 2 * nprocs + NBPARAM,
00388            ra, ca, 2 * nprocs + NBPARAM, -1, -1);
00389   newa = *ma;
00390   newb = *mb;
00391   ma = &newa;
00392   mb = &newb;
00393   if (myprow0 == -1) {
00394     p0 = param[0];
00395     q0 = param[1];
00396     ma->m = param[4];
00397     ma->n = param[5];
00398     ma->nbrow = param[6];
00399     ma->nbcol = param[7];
00400     ma->sprow = param[8];
00401     ma->spcol = param[9];
00402     ia = param[10];
00403     ja = param[11];
00404   }
00405   if (myprow1 == -1) {
00406     p1 = param[2];
00407     q1 = param[3];
00408     mb->m = param[12];
00409     mb->n = param[13];
00410     mb->nbrow = param[14];
00411     mb->nbcol = param[15];
00412     mb->sprow = param[16];
00413     mb->spcol = param[17];
00414     ib = param[18];
00415     jb = param[19];
00416   }
00417   for (i = 0; i < NBPARAM; i++) {
00418     if (param[i] == MAGIC_MAX) {
00419       fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n");
00420       exit(1);
00421     }
00422   }
00423 #ifndef NDEBUG
00424   for (i = 0; i < p0 * q0; i++)
00425     assert(proc0[i] >= 0 && proc0[i] < nprocs);
00426   for (i = 0; i < p1 * q1; i++)
00427     assert(proc1[i] >= 0 && proc1[i] < nprocs);
00428 #endif
00429   /* check the validity of the parameters */
00430   paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
00431   paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
00432   /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */
00433   {
00434     int   decal;
00435     ia = changeorigin(myprow0, ma->sprow, p0,
00436                       ma->nbrow, ia, &decal, &ma->sprow);
00437     ptrmyblock += decal;
00438     ja = changeorigin(mypcol0, ma->spcol, q0,
00439                       ma->nbcol, ja, &decal, &ma->spcol);
00440     ptrmyblock += decal * ma->lda;
00441     ma->m = ia + m;
00442     ma->n = ja + n;
00443     ib = changeorigin(myprow1, mb->sprow, p1,
00444                       mb->nbrow, ib, &decal, &mb->sprow);
00445     ptrmynewblock += decal;
00446     jb = changeorigin(mypcol1, mb->spcol, q1,
00447                       mb->nbcol, jb, &decal, &mb->spcol);
00448     ptrmynewblock += decal * mb->lda;
00449     mb->m = ib + m;
00450     mb->n = jb + n;
00451     if (p0 == 1)
00452       ma->nbrow = ma->m;
00453     if (q0 == 1)
00454       ma->nbcol = ma->n;
00455     if (p1 == 1)
00456       mb->nbrow = mb->m;
00457     if (q1 == 1)
00458       mb->nbcol = mb->n;
00459 #ifndef NDEBUG
00460     paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
00461     paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
00462 #endif
00463   }
00464   /* We compute the size of the memory buffer ( we choose the worst case,
00465    * when the buffer sizes == the memory block sizes). */
00466   if (myprow0 >= 0 && mypcol0 >= 0) {
00467     /* Initialize pointer variables */
00468     setmemory(&ptrsendbuff, memoryblocksize(ma));
00469   };    /* if (mypnum < p0 * q0) */
00470   if (myprow1 >= 0 && mypcol1 >= 0) {
00471     /* Initialize pointer variables */
00472     setmemory(&ptrrecvbuff, memoryblocksize(mb));
00473   };    /* if (mypnum < p1 * q1) */
00474   /* allocing room for the tabs, alloc for the worst case,local_n or local_m
00475    * intervals, in fact the worst case should be less, perhaps half that,I
00476    * should think of that one day. */
00477   h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) *
00478                                   ma->nbcol * sizeof(IDESC));
00479   v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow)
00480                                   * ma->nbrow * sizeof(IDESC));
00481   /* We go for the scanning of indices. For each processor including mypnum,
00482    * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send
00483    * it. Then for each processor, we compute the size of message to be
00484    * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements
00485    * of recvbuff the right place (scanD)(RECVBUFF)) */
00486   recvptr = ptrrecvbuff;
00487   {
00488     int   tot, myrang, step, sens;
00489     int  *sender, *recver;
00490     int   mesending, merecving;
00491     tot = max(p0 * q0, p1 * q1);
00492     init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
00493                   &sender, &recver, &myrang);
00494     if (myrang == -1)
00495       goto after_comm;
00496     mesending = myprow0 >= 0;
00497     assert(sender[myrang] >= 0 || !mesending);
00498     assert(!mesending || proc0[sender[myrang]] == mypnum);
00499     merecving = myprow1 >= 0;
00500     assert(recver[myrang] >= 0 || !merecving);
00501     assert(!merecving || proc1[recver[myrang]] == mypnum);
00502     step = tot - 1 - myrang;
00503     do {
00504       for (sens = 0; sens < 2; sens++) {
00505         /* be careful here, when we communicating with ourselves, we must
00506          * send first (myrang > step == 0) */
00507         if (mesending && recver[step] >= 0 &&
00508             (sens == 0)) {
00509           i = recver[step] / q1;
00510           j = recver[step] % q1;
00511           vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
00512                                      v_inter);
00513           hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
00514                                      h_inter);
00515           sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb,
00516                                 ptrmyblock, ma, ptrsendbuff);
00517         }       /* if (mesending...) { */
00518         if (mesending && recver[step] >= 0 &&
00519             (sens == myrang > step)) {
00520           i = recver[step] / q1;
00521           j = recver[step] % q1;
00522           if (sendsize > 0
00523               && (step != myrang || !merecving)
00524                 ) {
00525             Czgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize,
00526                      0, proc1[i * q1 + j]);
00527           }     /* sendsize > 0 */
00528         }       /* if (mesending ... */
00529         if (merecving && sender[step] >= 0 &&
00530             (sens == myrang <= step)) {
00531           i = sender[step] / q0;
00532           j = sender[step] % q0;
00533           vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
00534                                      v_inter);
00535           hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
00536                                      h_inter);
00537           recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter);
00538           if (recvsize > 0) {
00539             if (step == myrang && mesending) {
00540               Clacpy(recvsize, 1,
00541                      ptrsendbuff, recvsize,
00542                      ptrrecvbuff, recvsize);
00543             } else {
00544               Czgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize,
00545                        0, proc0[i * q0 + j]);
00546             }
00547           }     /* recvsize > 0 */
00548         }       /* if (merecving ...) */
00549         if (merecving && sender[step] >= 0 && sens == 1) {
00550           buff2block(v_inter, vinter_nb, h_inter, hinter_nb,
00551                      recvptr, ptrmynewblock, mb);
00552         }       /* if (merecving...)  */
00553       } /* for (sens = 0) */
00554       step -= 1;
00555       if (step < 0)
00556         step = tot - 1;
00557     } while (step != tot - 1 - myrang);
00558 after_comm:
00559     free(sender);
00560   }     /* { int tot,nr,ns ...} */
00561   /* don't forget to clean up things! */
00562   if (myprow1 >= 0 && mypcol1 >= 0) {
00563     freememory((char *) ptrrecvbuff);
00564   };
00565   if (myprow0 >= 0 && mypcol0 >= 0) {
00566     freememory((char *) ptrsendbuff);
00567   };
00568   if (nprow != 1)
00569     Cblacs_gridexit(gcontext);
00570   free(v_inter);
00571   free(h_inter);
00572   free(param);
00573 }/* distrib */
00574 static2 void 
00575 init_chenille(mypnum, nprocs, n0, proc0, n1, proc1, psend, precv, myrang)
00576   int   nprocs, mypnum, n0, n1;
00577   int  *proc0, *proc1, **psend, **precv, *myrang;
00578 {
00579   int   ns, nr, i, tot;
00580   int  *sender, *recver, *g0, *g1;
00581   tot = max(n0, n1);
00582   sender = (int *) mr2d_malloc((nprocs + tot) * sizeof(int) * 2);
00583   recver = sender + tot;
00584   *psend = sender;
00585   *precv = recver;
00586   g0 = recver + tot;
00587   g1 = g0 + nprocs;
00588   for (i = 0; i < nprocs; i++) {
00589     g0[i] = -1;
00590     g1[i] = -1;
00591   }
00592   for (i = 0; i < tot; i++) {
00593     sender[i] = -1;
00594     recver[i] = -1;
00595   }
00596   for (i = 0; i < n0; i++)
00597     g0[proc0[i]] = i;
00598   for (i = 0; i < n1; i++)
00599     g1[proc1[i]] = i;
00600   ns = 0;
00601   nr = 0;
00602   *myrang = -1;
00603   for (i = 0; i < nprocs; i++)
00604     if (g0[i] >= 0 && g1[i] >= 0) {
00605       if (i == mypnum)
00606         *myrang = nr;
00607       sender[ns] = g0[i];
00608       ns += 1;
00609       recver[nr] = g1[i];
00610       nr += 1;
00611       assert(ns <= n0 && nr <= n1 && nr == ns);
00612     }
00613   for (i = 0; i < nprocs; i++)
00614     if (g0[i] >= 0 && g1[i] < 0) {
00615       if (i == mypnum)
00616         *myrang = ns;
00617       sender[ns] = g0[i];
00618       ns += 1;
00619       assert(ns <= n0);
00620     }
00621   for (i = 0; i < nprocs; i++)
00622     if (g1[i] >= 0 && g0[i] < 0) {
00623       if (i == mypnum)
00624         *myrang = nr;
00625       recver[nr] = g1[i];
00626       nr += 1;
00627       assert(nr <= n1);
00628     }
00629 }
00630 #define Mlacpy(mo,no,ao,ldao,bo,ldbo) \
00631 { \
00632 dcomplex *_a,*_b; \
00633 int _m,_n,_lda,_ldb; \
00634     int _i,_j; \
00635     _m = (mo);_n = (no); \
00636     _a = (ao);_b = (bo); \
00637     _lda = (ldao) - _m; \
00638     _ldb = (ldbo) - _m; \
00639     assert(_lda >= 0 && _ldb >= 0); \
00640     for (_j=0;_j<_n;_j++) { \
00641       for (_i=0;_i<_m;_i++) \
00642         *_b++ = *_a++; \
00643       _b += _ldb; \
00644       _a += _lda; \
00645     } \
00646 }
00647 static2 int 
00648 block2buff(vi, vinb, hi, hinb, ptra, ma, buff)
00649   int   hinb, vinb;
00650   IDESC *hi, *vi;
00651   MDESC *ma;
00652   dcomplex *buff, *ptra;
00653 {
00654   int   h, v, sizebuff;
00655   dcomplex *ptr2;
00656   sizebuff = 0;
00657   for (h = 0; h < hinb; h++) {
00658     ptr2 = ptra + hi[h].lstart * ma->lda;
00659     for (v = 0; v < vinb; v++) {
00660       Mlacpy(vi[v].len, hi[h].len,
00661              ptr2 + vi[v].lstart,
00662              ma->lda,
00663              buff + sizebuff, vi[v].len);
00664       sizebuff += hi[h].len * vi[v].len;
00665     }
00666   }
00667   return sizebuff;
00668 }
00669 static2 void 
00670 buff2block(vi, vinb, hi, hinb, buff, ptrb, mb)
00671   int   hinb, vinb;
00672   IDESC *hi, *vi;
00673   MDESC *mb;
00674   dcomplex *buff, *ptrb;
00675 {
00676   int   h, v, sizebuff;
00677   dcomplex *ptr2;
00678   sizebuff = 0;
00679   for (h = 0; h < hinb; h++) {
00680     ptr2 = ptrb + hi[h].lstart * mb->lda;
00681     for (v = 0; v < vinb; v++) {
00682       Mlacpy(vi[v].len, hi[h].len,
00683              buff + sizebuff, vi[v].len,
00684              ptr2 + vi[v].lstart,
00685              mb->lda);
00686       sizebuff += hi[h].len * vi[v].len;
00687     }
00688   }
00689 }
00690 static2 int 
00691 inter_len(hinb, hi, vinb, vi)
00692   int   hinb, vinb;
00693   IDESC *hi, *vi;
00694 {
00695   int   hlen, vlen, h, v;
00696   hlen = 0;
00697   for (h = 0; h < hinb; h++)
00698     hlen += hi[h].len;
00699   vlen = 0;
00700   for (v = 0; v < vinb; v++)
00701     vlen += vi[v].len;
00702   return hlen * vlen;
00703 }
00704 void 
00705 Clacpy(m, n, a, lda, b, ldb)
00706   dcomplex *a, *b;
00707   int   m, n, lda, ldb;
00708 {
00709   int   i, j;
00710   lda -= m;
00711   ldb -= m;
00712   assert(lda >= 0 && ldb >= 0);
00713   for (j = 0; j < n; j++) {
00714     for (i = 0; i < m; i++)
00715       *b++ = *a++;
00716     b += ldb;
00717     a += lda;
00718   }
00719 }
00720 static2 void 
00721 gridreshape(ctxtp)
00722   int  *ctxtp;
00723 {
00724   int   ori, final;     /* original context, and new context created, with
00725                          * line form */
00726   int   nprow, npcol, myrow, mycol;
00727   int  *usermap;
00728   int   i, j;
00729   ori = *ctxtp;
00730   Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol);
00731   usermap = mr2d_malloc(sizeof(int) * nprow * npcol);
00732   for (i = 0; i < nprow; i++)
00733     for (j = 0; j < npcol; j++) {
00734       usermap[i + j * nprow] = Cblacs_pnum(ori, i, j);
00735     }
00736   /* Cblacs_get(0, 0, &final); */
00737   Cblacs_get(ori, 10, &final);
00738   Cblacs_gridmap(&final, usermap, 1, 1, nprow * npcol);
00739   *ctxtp = final;
00740   free(usermap);
00741 }