ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
psgemrdrv.c
Go to the documentation of this file.
00001 #include "redist.h"
00002 /* $Id: psgemrdrv.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $
00003  * 
00004  * psgemrdrv.c :
00005  * 
00006  * 
00007  * PURPOSE:
00008  * 
00009  * this driver is testing the PSGEMR2D routine. It calls it to obtain a new
00010  * scattered block data decomposition of a distributed REAL (block scattered)
00011  * matrix. Then it calls PSGEMR2D for the inverse redistribution and checks
00012  * the results with the initial data.
00013  * 
00014  * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the
00015  * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the
00016  * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition
00017  * on the processor grid p0 x q0.
00018  * 
00019  * See psgemr.c file for detailed info on the PSGEMR2D function.
00020  * 
00021  * 
00022  * The testing parameters are read from the file GEMR2D.dat, see the file in the
00023  * distribution to have an example.
00024  * 
00025  * created by Bernard Tourancheau in April 1994.
00026  * 
00027  * modifications : see sccs history
00028  * 
00029  * ===================================
00030  * 
00031  * 
00032  * NOTE :
00033  * 
00034  * - the matrix elements are REAL
00035  * 
00036  * - memory requirements : this procedure requires approximately 3 times the
00037  * memory space of the initial data block in grid 0 (initial block, copy for
00038  * test and second redistribution result) and 1 time the memory space of the
00039  * result data block in grid 1. with  the element size = sizeof(float) bytes,
00040  * 
00041  * 
00042  * - use the procedures of the files:
00043  * 
00044  * psgemr.o psgemr2.o psgemraux.o
00045  * 
00046  * 
00047  * ======================================
00048  * 
00049  * WARNING ASSUMPTIONS :
00050  * 
00051  * 
00052  * ========================================
00053  * 
00054  * 
00055  * Planned changes:
00056  * 
00057  * 
00058  * 
00059  * ========================================= */
00060 #define static2 static
00061 #if defined(Add_) || defined(f77IsF2C)
00062 #define fortran_mr2d psgemr2do_
00063 #define fortran_mr2dnew psgemr2d_
00064 #elif defined(UpCase)
00065 #define fortran_mr2dnew PSGEMR2D
00066 #define fortran_mr2d PSGEMR2DO
00067 #define scopy_ SCOPY
00068 #define slacpy_ SLACPY
00069 #else
00070 #define fortran_mr2d psgemr2do
00071 #define fortran_mr2dnew psgemr2d
00072 #define scopy_ scopy
00073 #define slacpy_ slacpy
00074 #endif
00075 #define Clacpy Csgelacpy
00076 void  Clacpy();
00077 typedef struct {
00078   int   desctype;
00079   int   ctxt;
00080   int   m;
00081   int   n;
00082   int   nbrow;
00083   int   nbcol;
00084   int   sprow;
00085   int   spcol;
00086   int   lda;
00087 }     MDESC;
00088 #define BLOCK_CYCLIC_2D 1
00089 typedef struct {
00090   int   lstart;
00091   int   len;
00092 }     IDESC;
00093 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
00094 #define max(A,B) ((A)>(B)?(A):(B))
00095 #define min(A,B) ((A)>(B)?(B):(A))
00096 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
00097 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
00098 #ifdef MALLOCDEBUG
00099 #define malloc mymalloc
00100 #define free myfree
00101 #define realloc myrealloc
00102 #endif
00103 /* Cblacs */
00104 extern void Cblacs_pcoord();
00105 extern int Cblacs_pnum();
00106 extern void Csetpvmtids();
00107 extern void Cblacs_get();
00108 extern void Cblacs_pinfo();
00109 extern void Cblacs_gridinfo();
00110 extern void Cblacs_gridinit();
00111 extern void Cblacs_exit();
00112 extern void Cblacs_gridexit();
00113 extern void Cblacs_setup();
00114 extern void Cigebs2d();
00115 extern void Cigebr2d();
00116 extern void Cigesd2d();
00117 extern void Cigerv2d();
00118 extern void Cigsum2d();
00119 extern void Cigamn2d();
00120 extern void Cigamx2d();
00121 extern void Csgesd2d();
00122 extern void Csgerv2d();
00123 /* lapack */
00124 void  slacpy_();
00125 /* aux fonctions */
00126 extern int localindice();
00127 extern void *mr2d_malloc();
00128 extern int ppcm();
00129 extern int localsize();
00130 extern int memoryblocksize();
00131 extern int changeorigin();
00132 extern void paramcheck();
00133 /* tools and others function */
00134 #define scanD0 sgescanD0
00135 #define dispmat sgedispmat
00136 #define setmemory sgesetmemory
00137 #define freememory sgefreememory
00138 #define scan_intervals sgescan_intervals
00139 extern void scanD0();
00140 extern void dispmat();
00141 extern void setmemory();
00142 extern void freememory();
00143 extern int scan_intervals();
00144 extern void Cpsgemr2do();
00145 extern void Cpsgemr2d();
00146 /* some defines for Cpsgemr2do */
00147 #define SENDBUFF 0
00148 #define RECVBUFF 1
00149 #define SIZEBUFF 2
00150 #if 0
00151 #define DEBUG
00152 #endif
00153 #ifndef DEBUG
00154 #define NDEBUG
00155 #endif
00156 #include <stdio.h>
00157 #include <stdlib.h>
00158 #include <string.h>
00159 #include <ctype.h>
00160 #include <assert.h>
00161 /* initblock: intialize the local part of a matrix with random data (well,
00162  * not very random) */
00163 static2 void
00164 initblock(block, m, n)
00165   float *block;
00166   int   m, n;
00167 {
00168   float *pdata;
00169   int   i;
00170   pdata = block;
00171   for (i = 0; i < m * n; i++, pdata++) {
00172     (*pdata) = i;
00173   };
00174 }
00175 /* getparam:read from a file a list of integer parameters, the end of the
00176  * parameters to read is given by a NULL at the end of the args list */
00177 #ifdef __STDC__
00178 #include <stdarg.h>
00179 static void 
00180 getparam(FILE * f,...)
00181 {
00182 #else
00183 #include <varargs.h>
00184 static void 
00185 getparam(va_alist)
00186 va_dcl
00187 {
00188   FILE *f;
00189 #endif
00190   va_list ap;
00191   int   i;
00192   static int nbline;
00193   char *ptr, *next;
00194   int  *var;
00195   static char buffer[200];
00196 #ifdef __STDC__
00197   va_start(ap, f);
00198 #else
00199   va_start(ap);
00200   f = va_arg(ap, FILE *);
00201 #endif
00202   do {
00203     next = fgets(buffer, 200, f);
00204     if (next == NULL) {
00205       fprintf(stderr, "bad configuration driver file:after line %d\n", nbline);
00206       exit(1);
00207     }
00208     nbline += 1;
00209   } while (buffer[0] == '#');
00210   ptr = buffer;
00211   var = va_arg(ap, int *);
00212   while (var != NULL) {
00213     *var = strtol(ptr, &next, 10);
00214     if (ptr == next) {
00215       fprintf(stderr, "bad configuration driver file:error line %d\n", nbline);
00216       exit(1);
00217     }
00218     ptr = next;
00219     var = va_arg(ap, int *);
00220   }
00221   va_end(ap);
00222 }
00223 void 
00224 initforpvm(argc, argv)
00225   int   argc;
00226   char *argv[];
00227 {
00228   int   pnum, nproc;
00229   Cblacs_pinfo(&pnum, &nproc);
00230   if (nproc < 1) {      /* we are with PVM */
00231     if (pnum == 0) {
00232       if (argc < 2) {
00233         fprintf(stderr, "usage with PVM:xsgemr nbproc\n\
00234 \t where nbproc is the number of nodes to initialize\n");
00235         exit(1);
00236       }
00237       nproc = atoi(argv[1]);
00238     }
00239     Cblacs_setup(&pnum, &nproc);
00240   }
00241 }
00242 int
00243 main(argc, argv)
00244   int   argc;
00245   char *argv[];
00246 {
00247   /* We initialize the data-block on the current processor, then redistribute
00248    * it, and perform the inverse redistribution  to compare the local memory
00249    * with the initial one. */
00250   /* Data file */
00251   FILE *fp;
00252   int   nbre, nbremax;
00253   /* Data distribution 0 parameters */
00254   int   p0,     /* # of rows in the processor grid */
00255         q0;     /* # of columns in the processor grid */
00256   /* Data distribution 1 parameters */
00257   int   p1, q1;
00258   /* # of parameter to be read on the keyboard */
00259 #define nbparameter 24
00260   /* General variables */
00261   int   blocksize0;
00262   int   mypnum, nprocs;
00263   int   parameters[nbparameter], nberrors;
00264   int   i;
00265   int   ia, ja, ib, jb, m, n;
00266   int   gcontext, context0, context1;
00267   int   myprow1, myprow0, mypcol0, mypcol1;
00268   int   dummy;
00269   MDESC ma, mb;
00270   float *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide;
00271 #ifdef UsingMpiBlacs
00272    MPI_Init(&argc, &argv);
00273 #endif
00274   setvbuf(stdout, NULL, _IOLBF, 0);
00275   setvbuf(stderr, NULL, _IOLBF, 0);
00276 #ifdef T3D
00277   free(malloc(14000000));
00278 #endif
00279   initforpvm(argc, argv);
00280   /* Read physical parameters */
00281   Cblacs_pinfo(&mypnum, &nprocs);
00282   /* initialize BLACS for the parameter communication */
00283   Cblacs_get(0, 0, &gcontext);
00284   Cblacs_gridinit(&gcontext, "R", nprocs, 1);
00285   Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy);
00286   if (mypnum == 0) {
00287     if ((fp = fopen("GEMR2D.dat", "r")) == NULL) {
00288       fprintf(stderr, "Can't open GEMR2D.dat\n");
00289       exit(1);
00290     };
00291     printf("\n// SGEMR2D TESTER for REAL //\n");
00292     getparam(fp, &nbre, NULL);
00293     printf("////////// %d tests \n\n", nbre);
00294     parameters[0] = nbre;
00295     Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1);
00296   } else {
00297     Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0);
00298     nbre = parameters[0];
00299   };
00300   if (mypnum == 0) {
00301     printf("\n  m   n  m0  n0  sr0 sc0 i0  j0  p0  q0 nbr0 nbc0 \
00302 m1  n1  sr1 sc1 i1  j1  p1  q1 nbr1 nbc1\n\n");
00303   };
00304   /****** TEST LOOP *****/
00305   /* Here we are in grip 1xnprocs */
00306   nbremax = nbre;
00307 #ifdef DEBUG
00308   fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum);
00309 #endif
00310   while (nbre-- != 0) { /* Loop on the serie of tests */
00311     /* All the processors read the parameters so we have to be in a 1xnprocs
00312      * grid at each iteration */
00313     /* Read processors grid and matrices parameters */
00314     if (mypnum == 0) {
00315       int   u, d;
00316       getparam(fp,
00317                &m, &n,
00318                &ma.m, &ma.n, &ma.sprow, &ma.spcol,
00319                &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol,
00320                &mb.m, &mb.n, &mb.sprow, &mb.spcol,
00321                &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol,
00322                NULL);
00323       printf("\t\t************* TEST # %d **********\n",
00324              nbremax - nbre);
00325       printf(" %3d %3d %3d %3d %3d %3d %3d %3d \
00326 %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d",
00327              m, n,
00328              ma.m, ma.n, ma.sprow, ma.spcol,
00329              ia, ja, p0, q0, ma.nbrow, ma.nbcol,
00330              mb.m, mb.n, mb.sprow, mb.spcol,
00331              ib, jb, p1, q1, mb.nbrow, mb.nbcol);
00332       printf("\n");
00333       if (p0 * q0 > nprocs || p1 * q1 > nprocs) {
00334         fprintf(stderr, "not enough nodes:%d processors required\n",
00335                 max(p0 * q0, p1 * q1));
00336         exit(1);
00337       }
00338       parameters[0] = p0;
00339       parameters[1] = q0;
00340       parameters[2] = ma.nbrow;
00341       parameters[3] = ma.nbcol;
00342       parameters[4] = p1;
00343       parameters[5] = q1;
00344       parameters[6] = mb.nbrow;
00345       parameters[7] = mb.nbcol;
00346       parameters[8] = ma.m;
00347       parameters[9] = ma.n;
00348       parameters[10] = ma.sprow;
00349       parameters[11] = ma.spcol;
00350       parameters[12] = mb.sprow;
00351       parameters[13] = mb.spcol;
00352       parameters[14] = ia;
00353       parameters[15] = ja;
00354       parameters[16] = ib;
00355       parameters[17] = jb;
00356       parameters[18] = m;
00357       parameters[19] = n;
00358       parameters[20] = mb.m;
00359       parameters[21] = mb.n;
00360       Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1);
00361     } else {
00362       Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0);
00363       p0 = parameters[0];
00364       q0 = parameters[1];
00365       ma.nbrow = parameters[2];
00366       ma.nbcol = parameters[3];
00367       p1 = parameters[4];
00368       q1 = parameters[5];
00369       mb.nbrow = parameters[6];
00370       mb.nbcol = parameters[7];
00371       ma.m = parameters[8];
00372       ma.n = parameters[9];
00373       ma.sprow = parameters[10];
00374       ma.spcol = parameters[11];
00375       mb.sprow = parameters[12];
00376       mb.spcol = parameters[13];
00377       ia = parameters[14];
00378       ja = parameters[15];
00379       ib = parameters[16];
00380       jb = parameters[17];
00381       m = parameters[18];
00382       n = parameters[19];
00383       mb.m = parameters[20];
00384       mb.n = parameters[21];
00385       ma.desctype = BLOCK_CYCLIC_2D;
00386       mb.desctype = BLOCK_CYCLIC_2D;
00387     };
00388     Cblacs_get(0, 0, &context0);
00389     Cblacs_gridinit(&context0, "R", p0, q0);
00390     Cblacs_get(0, 0, &context1);
00391     Cblacs_gridinit(&context1, "R", p1, q1);
00392     Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0);
00393     if (myprow0 >= p0 || mypcol0 >= q0)
00394       myprow0 = mypcol0 = -1;
00395     Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1);
00396     if (myprow1 >= p1 || mypcol1 >= q1)
00397       myprow1 = mypcol1 = -1;
00398     assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
00399     assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
00400     ma.ctxt = context0;
00401     mb.ctxt = context1;
00402     /* From here, we are not assuming that only the processors working in the
00403      * redistribution are calling  xxMR2D, but the ones not concerned will do
00404      * nothing. */
00405     /* We compute the exact size of the local memory block for the memory
00406      * allocations */
00407     if (myprow0 >= 0 && mypcol0 >= 0) {
00408       blocksize0 = memoryblocksize(&ma);
00409       ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m);
00410       setmemory(&ptrmyblock, blocksize0);
00411       initblock(ptrmyblock, 1, blocksize0);
00412       setmemory(&ptrmyblockcopy, blocksize0);
00413       memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock,
00414              blocksize0 * sizeof(float));
00415       setmemory(&ptrmyblockvide, blocksize0);
00416       for (i = 0; i < blocksize0; i++)
00417         ptrmyblockvide[i] = -1;
00418     };  /* if (mypnum < p0 * q0) */
00419     if (myprow1 >= 0 && mypcol1 >= 0) {
00420       setmemory(&ptrsavemyblock, memoryblocksize(&mb));
00421       mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m);
00422     };  /* if (mypnum < p1 * q1)  */
00423     /* Redistribute the matrix from grid 0 to grid 1 (memory location
00424      * ptrmyblock to ptrsavemyblock) */
00425     Cpsgemr2d(m, n,
00426               ptrmyblock, ia, ja, &ma,
00427               ptrsavemyblock, ib, jb, &mb, gcontext);
00428     /* Perform the inverse redistribution of the matrix from grid 1 to grid 0
00429      * (memory location ptrsavemyblock to ptrmyblockvide) */
00430     Cpsgemr2d(m, n,
00431               ptrsavemyblock, ib, jb, &mb,
00432               ptrmyblockvide, ia, ja, &ma, gcontext);
00433     /* Check the differences */
00434     nberrors = 0;
00435     if (myprow0 >= 0 && mypcol0 >= 0) {
00436       /* only for the processors that do have data at the begining */
00437       for (i = 0; i < blocksize0; i++) {
00438         int   li, lj, gi, gj;
00439         int   in;
00440         in = 1;
00441         li = i % ma.lda;
00442         lj = i / ma.lda;
00443         gi = (li / ma.nbrow) * p0 * ma.nbrow +
00444               SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow;
00445         gj = (lj / ma.nbcol) * q0 * ma.nbcol +
00446               SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol;
00447         assert(gi < ma.m && gj < ma.n);
00448         gi -= (ia - 1);
00449         gj -= (ja - 1);
00450         if (gi < 0 || gj < 0 || gi >= m || gj >= n)
00451           in = 0;
00452         if (!in) {
00453           ptrmyblockcopy[i] = -1;
00454         }
00455         if (ptrmyblockvide[i] != ptrmyblockcopy[i]) {
00456           nberrors++;
00457         };
00458       };
00459       if (nberrors > 0) {
00460         printf("Processor %d, has tested  %d REAL elements,\
00461 Number of redistribution errors = %d \n",
00462                mypnum, blocksize0, nberrors);
00463       }
00464     }
00465     /* Look at the errors on all the processors at this point. */
00466     Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0);
00467     if (mypnum == 0)
00468       if (nberrors)
00469         printf("  => Total number of redistribution errors = %d \n",
00470                nberrors);
00471       else
00472         printf("TEST PASSED OK\n");
00473     /* release memory for the next iteration */
00474     if (myprow0 >= 0 && mypcol0 >= 0) {
00475       freememory((char *) ptrmyblock);
00476       freememory((char *) ptrmyblockvide);
00477       freememory((char *) ptrmyblockcopy);
00478     };  /* if (mypnum < p0 * q0) */
00479     /* release memory for the next iteration */
00480     if (myprow1 >= 0 && mypcol1 >= 0) {
00481       freememory((char *) ptrsavemyblock);
00482     };
00483     if (myprow0 >= 0)
00484       Cblacs_gridexit(context0);
00485     if (myprow1 >= 0)
00486       Cblacs_gridexit(context1);
00487   };    /* while nbre != 0 */
00488   if (mypnum == 0) {
00489     fclose(fp);
00490   };
00491   Cblacs_exit(0);
00492   return 0;
00493 }/* main */