ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pgemraux.c
Go to the documentation of this file.
00001 #include "redist.h"
00002 /* $Id: pgemraux.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $
00003  * 
00004  * some functions used by the pigemr2d routine see file pigemr.c for more
00005  * documentation.
00006  * 
00007  * Created March 1993 by B. Tourancheau (See sccs for modifications). */
00008 #define static2 static
00009 #if defined(Add_) || defined(f77IsF2C)
00010 #define fortran_mr2d pigemr2do_
00011 #define fortran_mr2dnew pigemr2d_
00012 #elif defined(UpCase)
00013 #define fortran_mr2dnew PIGEMR2D
00014 #define fortran_mr2d PIGEMR2DO
00015 #define icopy_ ICOPY
00016 #define ilacpy_ ILACPY
00017 #else
00018 #define fortran_mr2d pigemr2do
00019 #define fortran_mr2dnew pigemr2d
00020 #define icopy_ icopy
00021 #define ilacpy_ ilacpy
00022 #endif
00023 #define Clacpy Cigelacpy
00024 void  Clacpy();
00025 typedef struct {
00026   int   desctype;
00027   int   ctxt;
00028   int   m;
00029   int   n;
00030   int   nbrow;
00031   int   nbcol;
00032   int   sprow;
00033   int   spcol;
00034   int   lda;
00035 }     MDESC;
00036 #define BLOCK_CYCLIC_2D 1
00037 typedef struct {
00038   int   lstart;
00039   int   len;
00040 }     IDESC;
00041 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
00042 #define max(A,B) ((A)>(B)?(A):(B))
00043 #define min(A,B) ((A)>(B)?(B):(A))
00044 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
00045 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
00046 #ifdef MALLOCDEBUG
00047 #define malloc mymalloc
00048 #define free myfree
00049 #define realloc myrealloc
00050 #endif
00051 /* Cblacs */
00052 extern void Cblacs_pcoord();
00053 extern int Cblacs_pnum();
00054 extern void Csetpvmtids();
00055 extern void Cblacs_get();
00056 extern void Cblacs_pinfo();
00057 extern void Cblacs_gridinfo();
00058 extern void Cblacs_gridinit();
00059 extern void Cblacs_exit();
00060 extern void Cblacs_gridexit();
00061 extern void Cblacs_setup();
00062 extern void Cigebs2d();
00063 extern void Cigebr2d();
00064 extern void Cigesd2d();
00065 extern void Cigerv2d();
00066 extern void Cigsum2d();
00067 extern void Cigamn2d();
00068 extern void Cigamx2d();
00069 extern void Cigesd2d();
00070 extern void Cigerv2d();
00071 /* lapack */
00072 void  ilacpy_();
00073 /* aux fonctions */
00074 extern int localindice();
00075 extern void *mr2d_malloc();
00076 extern int ppcm();
00077 extern int localsize();
00078 extern int memoryblocksize();
00079 extern int changeorigin();
00080 extern void paramcheck();
00081 /* tools and others function */
00082 #define scanD0 igescanD0
00083 #define dispmat igedispmat
00084 #define setmemory igesetmemory
00085 #define freememory igefreememory
00086 #define scan_intervals igescan_intervals
00087 extern void scanD0();
00088 extern void dispmat();
00089 extern void setmemory();
00090 extern void freememory();
00091 extern int scan_intervals();
00092 extern void Cpigemr2do();
00093 extern void Cpigemr2d();
00094 /* some defines for Cpigemr2do */
00095 #define SENDBUFF 0
00096 #define RECVBUFF 1
00097 #define SIZEBUFF 2
00098 #if 0
00099 #define DEBUG
00100 #endif
00101 #ifndef DEBUG
00102 #define NDEBUG
00103 #endif
00104 #include <stdio.h>
00105 #include <stdlib.h>
00106 #include <assert.h>
00107 void *
00108 mr2d_malloc(n)
00109   int   n;
00110 {
00111   void *ptr;
00112   assert(n > 0);
00113   ptr = (void *) malloc(n);
00114   if (ptr == NULL) {
00115     fprintf(stderr, "xxmr2d:out of memory\n");
00116     exit(2);
00117   }
00118   return ptr;
00119 }
00120 int 
00121 pgcd(a, b)
00122   int   a, b;
00123 {
00124   int   aux;
00125   if (a < b)
00126     return pgcd(b, a);
00127   else {
00128     aux = a % b;
00129     if (aux == 0)
00130       return b;
00131     else
00132       return pgcd(b, aux);
00133   }
00134 }
00135 int 
00136 ppcm(a, b)
00137   int   a, b;
00138 {
00139   int   pg;
00140   pg = pgcd(a, b);
00141   return a * (b / pg);
00142 }
00143 /* localsize:return the number of rows on the local processor given by its
00144  * row number myprow, of a distributed matrix with m rows distributed of on a
00145  * grid of processors with p rows with blocksize nbrow : this procedure can
00146  * also be used to compute the number of cols by replacing rows by cols */
00147 int 
00148 localsize(myprow, p, nbrow, m)
00149   int   myprow, p, nbrow, m;
00150 {
00151   int   templateheight, blockheight;
00152   templateheight = p * nbrow;
00153   if (m % templateheight != 0) {        /* not an exact boundary */
00154     if ((m % templateheight) > (nbrow * myprow)) {      /* processor
00155                                                          * (myprow,mypcol) has
00156                                                          * some elements in that
00157                                                          * incomplete template */
00158       if ((m % templateheight) >= (nbrow * (myprow + 1))) {     /* processor
00159                                                                  * (myprow,mypcol)'s
00160                                                                  * part is complete */
00161         blockheight = (m / templateheight) * nbrow + nbrow;
00162       } else {  /* processor (myprow,mypcol)'s part is not complete */
00163         blockheight = (m / templateheight) * nbrow + (m % nbrow);
00164       };        /* if ((m%templateheight) > (nbrow*(myprow+1))) */
00165     } else {    /* processor (myprow,mypcol) has no element in that
00166                  * incomplete template */
00167       blockheight = (m / templateheight) * nbrow;
00168     };  /* if ((m%templateheight) > (nbrow*myprow)) */
00169   } else {      /* exact boundary */
00170     blockheight = m / p;        /* (m/templateheight) * nbrow */
00171   };    /* if (m%templateheight !=0) */
00172   return blockheight;
00173 }
00174 /****************************************************************/
00175 /* Returns the exact memory block size corresponding to the parameters */
00176 int
00177 memoryblocksize(a)
00178   MDESC *a;
00179 {
00180   int   myprow, mypcol, p, q;
00181   /* Compute the (myprow,mypcol) indices of processor mypnum in P0xQ0 We
00182    * assume the row-major ordering of the BLACS */
00183   Cblacs_gridinfo(a->ctxt, &p, &q, &myprow, &mypcol);
00184   myprow = SHIFT(myprow, a->sprow, p);
00185   mypcol = SHIFT(mypcol, a->spcol, q);
00186   assert(myprow >= 0 && mypcol >= 0);
00187   return localsize(myprow, p, a->nbrow, a->m) *
00188         localsize(mypcol, q, a->nbcol, a->n);
00189 }
00190 void 
00191 checkequal(ctxt, a)
00192   int   a, ctxt;
00193 {
00194   int   np, dummy, nbrow, myp, b;
00195   Cblacs_gridinfo(ctxt, &nbrow, &np, &dummy, &myp);
00196   assert(nbrow == 1);
00197   if (np == 1)
00198     return;
00199   if (myp == 0) {
00200     Cigesd2d(ctxt, 1, 1, &a, 1, 0, 1);
00201     Cigerv2d(ctxt, 1, 1, &b, 1, 0, np - 1);
00202     assert(a == b);
00203   } else {
00204     Cigerv2d(ctxt, 1, 1, &b, 1, 0, myp - 1);
00205     assert(a == b);
00206     Cigesd2d(ctxt, 1, 1, &a, 1, 0, (myp + 1) % np);
00207   }
00208 }
00209 void 
00210 paramcheck(a, i, j, m, n, p, q, gcontext)
00211   MDESC *a;
00212   int   i, j, m, n, p, q;
00213 {
00214   int   p2, q2, myprow, mypcol;
00215 #ifndef NDEBUG
00216   checkequal(gcontext, p);
00217   checkequal(gcontext, q);
00218   checkequal(gcontext, a->sprow);
00219   checkequal(gcontext, a->spcol);
00220   checkequal(gcontext, a->m);
00221   checkequal(gcontext, a->n);
00222   checkequal(gcontext, i);
00223   checkequal(gcontext, j);
00224   checkequal(gcontext, a->nbrow);
00225   checkequal(gcontext, a->nbcol);
00226 #endif
00227   Cblacs_gridinfo(a->ctxt, &p2, &q2, &myprow, &mypcol);
00228   /* compatibility T3D, must check myprow  and mypcol are within bounds */
00229   if (myprow >= p2 || mypcol >= q2)
00230     myprow = mypcol = -1;
00231   if ((myprow >= 0 || mypcol >= 0) && (p2 != p && q2 != q)) {
00232     fprintf(stderr, "??MR2D:incoherent p,q parameters\n");
00233     exit(1);
00234   }
00235   assert(myprow < p && mypcol < q);
00236   if (a->sprow < 0 || a->sprow >= p || a->spcol < 0 || a->spcol >= q) {
00237     fprintf(stderr, "??MR2D:Bad first processor coordinates\n");
00238     exit(1);
00239   }
00240   if (i < 0 || j < 0 || i + m > a->m || j + n > a->n) {
00241     fprintf(stderr, "??MR2D:Bad submatrix:i=%d,j=%d,\
00242 m=%d,n=%d,M=%d,N=%d\n",
00243             i, j, m, n, a->m, a->n);
00244     exit(1);
00245   }
00246   if ((myprow >= 0 || mypcol >= 0) &&
00247       localsize(SHIFT(myprow, a->sprow, p), p, a->nbrow, a->m) > a->lda) {
00248     fprintf(stderr, "??MR2D:bad lda arg:row=%d,m=%d,p=%d,\
00249 nbrow=%d,lda=%d,sprow=%d\n",
00250             myprow, a->m, p, a->nbrow, a->lda, a->sprow);
00251     exit(1);
00252   }
00253 }
00254 /* to change from the submatrix beginning at line i to one beginning at line
00255  * i' with i'< blocksize return the line number on the local process where
00256  * the new matrix begin, the new process number, and i' */
00257 int 
00258 changeorigin(myp, sp, p, bs, i, decal, newsp)
00259   int   myp, sp, p, bs, i;
00260   int  *decal, *newsp;
00261 {
00262   int   tempheight, firstblock, firsttemp;
00263   /* we begin by changing the parameters so that ia < templatewidth,... */
00264   tempheight = bs * p;
00265   firsttemp = i / tempheight;
00266   firstblock = (i / bs) % p;
00267   *newsp = (sp + firstblock) % p;
00268   if (myp >= 0)
00269     *decal = firsttemp * bs + (SHIFT(myp, sp, p) < firstblock ? bs : 0);
00270   else
00271     *decal = 0;
00272   return i % bs;
00273 }
00274 /******************************************************************/
00275 /* Return the indice in local memory of element of indice a in the matrix */
00276 int
00277 localindice(ig, jg, templateheight, templatewidth, a)
00278   int   templateheight, templatewidth, ig, jg;
00279   MDESC *a;
00280 /* Return the indice in local memory (scattered distribution) of the element
00281  * of indice a in global matrix */
00282 {
00283   int   vtemp, htemp, vsubtemp, hsubtemp, il, jl;
00284   assert(ig >= 0 && ig < a->m && jg >= 0 && jg < a->n);
00285   /* coordinates in global matrix with the tests in intersect, ig MUST BE in
00286    * [0..m] and jg in [0..n] */
00287   /* coordinates of the template that "owns" the element */
00288   vtemp = ig / templateheight;
00289   htemp = jg / templatewidth;
00290   /* coordinates of the element in the subblock of the (vtemp, htemp)
00291    * template */
00292   vsubtemp = ig % a->nbrow;
00293   hsubtemp = jg % a->nbcol;
00294   /* coordinates of the element in the local block of the processor */
00295   il = a->nbrow * vtemp + vsubtemp;
00296   jl = a->nbcol * htemp + hsubtemp;
00297   assert(il < a->lda);
00298 #ifndef NDEBUG
00299   {
00300     int   pr, pc, p, q, lp, lq;
00301     Cblacs_gridinfo(a->ctxt, &p, &q, &pr, &pc);
00302     p = templateheight / a->nbrow;
00303     q = templatewidth / a->nbcol;
00304     lp = ig % templateheight / a->nbrow;
00305     lq = jg % templatewidth / a->nbcol;
00306     assert(lp == SHIFT(pr, a->sprow, p));
00307     assert(lq == SHIFT(pc, a->spcol, q));
00308   }
00309 #endif
00310   return (jl * a->lda + il);
00311 }