#include "redist.h" /* $Id: pgemraux.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ * * some functions used by the pigemr2d routine see file pigemr.c for more * documentation. * * Created March 1993 by B. Tourancheau (See sccs for modifications). */ #define static2 static #if defined(Add_) || defined(f77IsF2C) #define fortran_mr2d pigemr2do_ #define fortran_mr2dnew pigemr2d_ #elif defined(UpCase) #define fortran_mr2dnew PIGEMR2D #define fortran_mr2d PIGEMR2DO #define icopy_ ICOPY #define ilacpy_ ILACPY #else #define fortran_mr2d pigemr2do #define fortran_mr2dnew pigemr2d #define icopy_ icopy #define ilacpy_ ilacpy #endif #define Clacpy Cigelacpy void Clacpy(); typedef struct { int desctype; int ctxt; int m; int n; int nbrow; int nbcol; int sprow; int spcol; int lda; } MDESC; #define BLOCK_CYCLIC_2D 1 typedef struct { int lstart; int len; } IDESC; #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) #define max(A,B) ((A)>(B)?(A):(B)) #define min(A,B) ((A)>(B)?(B):(A)) #define DIVUP(a,b) ( ((a)-1) /(b)+1) #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) #ifdef MALLOCDEBUG #define malloc mymalloc #define free myfree #define realloc myrealloc #endif /* Cblacs */ extern void Cblacs_pcoord(); extern int Cblacs_pnum(); extern void Csetpvmtids(); extern void Cblacs_get(); extern void Cblacs_pinfo(); extern void Cblacs_gridinfo(); extern void Cblacs_gridinit(); extern void Cblacs_exit(); extern void Cblacs_gridexit(); extern void Cblacs_setup(); extern void Cigebs2d(); extern void Cigebr2d(); extern void Cigesd2d(); extern void Cigerv2d(); extern void Cigsum2d(); extern void Cigamn2d(); extern void Cigamx2d(); extern void Cigesd2d(); extern void Cigerv2d(); /* lapack */ void ilacpy_(); /* aux fonctions */ extern int localindice(); extern void *mr2d_malloc(); extern int ppcm(); extern int localsize(); extern int memoryblocksize(); extern int changeorigin(); extern void paramcheck(); /* tools and others function */ #define scanD0 igescanD0 #define dispmat igedispmat #define setmemory igesetmemory #define freememory igefreememory #define scan_intervals igescan_intervals extern void scanD0(); extern void dispmat(); extern void setmemory(); extern void freememory(); extern int scan_intervals(); extern void Cpigemr2do(); extern void Cpigemr2d(); /* some defines for Cpigemr2do */ #define SENDBUFF 0 #define RECVBUFF 1 #define SIZEBUFF 2 #if 0 #define DEBUG #endif #ifndef DEBUG #define NDEBUG #endif #include #include #include void * mr2d_malloc(n) int n; { void *ptr; assert(n > 0); ptr = (void *) malloc(n); if (ptr == NULL) { fprintf(stderr, "xxmr2d:out of memory\n"); exit(2); } return ptr; } int pgcd(a, b) int a, b; { int aux; if (a < b) return pgcd(b, a); else { aux = a % b; if (aux == 0) return b; else return pgcd(b, aux); } } int ppcm(a, b) int a, b; { int pg; pg = pgcd(a, b); return a * (b / pg); } /* localsize:return the number of rows on the local processor given by its * row number myprow, of a distributed matrix with m rows distributed of on a * grid of processors with p rows with blocksize nbrow : this procedure can * also be used to compute the number of cols by replacing rows by cols */ int localsize(myprow, p, nbrow, m) int myprow, p, nbrow, m; { int templateheight, blockheight; templateheight = p * nbrow; if (m % templateheight != 0) { /* not an exact boundary */ if ((m % templateheight) > (nbrow * myprow)) { /* processor * (myprow,mypcol) has * some elements in that * incomplete template */ if ((m % templateheight) >= (nbrow * (myprow + 1))) { /* processor * (myprow,mypcol)'s * part is complete */ blockheight = (m / templateheight) * nbrow + nbrow; } else { /* processor (myprow,mypcol)'s part is not complete */ blockheight = (m / templateheight) * nbrow + (m % nbrow); }; /* if ((m%templateheight) > (nbrow*(myprow+1))) */ } else { /* processor (myprow,mypcol) has no element in that * incomplete template */ blockheight = (m / templateheight) * nbrow; }; /* if ((m%templateheight) > (nbrow*myprow)) */ } else { /* exact boundary */ blockheight = m / p; /* (m/templateheight) * nbrow */ }; /* if (m%templateheight !=0) */ return blockheight; } /****************************************************************/ /* Returns the exact memory block size corresponding to the parameters */ int memoryblocksize(a) MDESC *a; { int myprow, mypcol, p, q; /* Compute the (myprow,mypcol) indices of processor mypnum in P0xQ0 We * assume the row-major ordering of the BLACS */ Cblacs_gridinfo(a->ctxt, &p, &q, &myprow, &mypcol); myprow = SHIFT(myprow, a->sprow, p); mypcol = SHIFT(mypcol, a->spcol, q); assert(myprow >= 0 && mypcol >= 0); return localsize(myprow, p, a->nbrow, a->m) * localsize(mypcol, q, a->nbcol, a->n); } void checkequal(ctxt, a) int a, ctxt; { int np, dummy, nbrow, myp, b; Cblacs_gridinfo(ctxt, &nbrow, &np, &dummy, &myp); assert(nbrow == 1); if (np == 1) return; if (myp == 0) { Cigesd2d(ctxt, 1, 1, &a, 1, 0, 1); Cigerv2d(ctxt, 1, 1, &b, 1, 0, np - 1); assert(a == b); } else { Cigerv2d(ctxt, 1, 1, &b, 1, 0, myp - 1); assert(a == b); Cigesd2d(ctxt, 1, 1, &a, 1, 0, (myp + 1) % np); } } void paramcheck(a, i, j, m, n, p, q, gcontext) MDESC *a; int i, j, m, n, p, q; { int p2, q2, myprow, mypcol; #ifndef NDEBUG checkequal(gcontext, p); checkequal(gcontext, q); checkequal(gcontext, a->sprow); checkequal(gcontext, a->spcol); checkequal(gcontext, a->m); checkequal(gcontext, a->n); checkequal(gcontext, i); checkequal(gcontext, j); checkequal(gcontext, a->nbrow); checkequal(gcontext, a->nbcol); #endif Cblacs_gridinfo(a->ctxt, &p2, &q2, &myprow, &mypcol); /* compatibility T3D, must check myprow and mypcol are within bounds */ if (myprow >= p2 || mypcol >= q2) myprow = mypcol = -1; if ((myprow >= 0 || mypcol >= 0) && (p2 != p && q2 != q)) { fprintf(stderr, "??MR2D:incoherent p,q parameters\n"); exit(1); } assert(myprow < p && mypcol < q); if (a->sprow < 0 || a->sprow >= p || a->spcol < 0 || a->spcol >= q) { fprintf(stderr, "??MR2D:Bad first processor coordinates\n"); exit(1); } if (i < 0 || j < 0 || i + m > a->m || j + n > a->n) { fprintf(stderr, "??MR2D:Bad submatrix:i=%d,j=%d,\ m=%d,n=%d,M=%d,N=%d\n", i, j, m, n, a->m, a->n); exit(1); } if ((myprow >= 0 || mypcol >= 0) && localsize(SHIFT(myprow, a->sprow, p), p, a->nbrow, a->m) > a->lda) { fprintf(stderr, "??MR2D:bad lda arg:row=%d,m=%d,p=%d,\ nbrow=%d,lda=%d,sprow=%d\n", myprow, a->m, p, a->nbrow, a->lda, a->sprow); exit(1); } } /* to change from the submatrix beginning at line i to one beginning at line * i' with i'< blocksize return the line number on the local process where * the new matrix begin, the new process number, and i' */ int changeorigin(myp, sp, p, bs, i, decal, newsp) int myp, sp, p, bs, i; int *decal, *newsp; { int tempheight, firstblock, firsttemp; /* we begin by changing the parameters so that ia < templatewidth,... */ tempheight = bs * p; firsttemp = i / tempheight; firstblock = (i / bs) % p; *newsp = (sp + firstblock) % p; if (myp >= 0) *decal = firsttemp * bs + (SHIFT(myp, sp, p) < firstblock ? bs : 0); else *decal = 0; return i % bs; } /******************************************************************/ /* Return the indice in local memory of element of indice a in the matrix */ int localindice(ig, jg, templateheight, templatewidth, a) int templateheight, templatewidth, ig, jg; MDESC *a; /* Return the indice in local memory (scattered distribution) of the element * of indice a in global matrix */ { int vtemp, htemp, vsubtemp, hsubtemp, il, jl; assert(ig >= 0 && ig < a->m && jg >= 0 && jg < a->n); /* coordinates in global matrix with the tests in intersect, ig MUST BE in * [0..m] and jg in [0..n] */ /* coordinates of the template that "owns" the element */ vtemp = ig / templateheight; htemp = jg / templatewidth; /* coordinates of the element in the subblock of the (vtemp, htemp) * template */ vsubtemp = ig % a->nbrow; hsubtemp = jg % a->nbcol; /* coordinates of the element in the local block of the processor */ il = a->nbrow * vtemp + vsubtemp; jl = a->nbcol * htemp + hsubtemp; assert(il < a->lda); #ifndef NDEBUG { int pr, pc, p, q, lp, lq; Cblacs_gridinfo(a->ctxt, &p, &q, &pr, &pc); p = templateheight / a->nbrow; q = templatewidth / a->nbcol; lp = ig % templateheight / a->nbrow; lq = jg % templatewidth / a->nbcol; assert(lp == SHIFT(pr, a->sprow, p)); assert(lq == SHIFT(pc, a->spcol, q)); } #endif return (jl * a->lda + il); }