ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pztrmrdrv.c
Go to the documentation of this file.
00001 #include "redist.h"
00002 /* $Id: pztrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $
00003  * 
00004  * pztrmrdrv.c :
00005  * 
00006  * 
00007  * PURPOSE:
00008  * 
00009  * this driver is testing the PZTRMR2D routine. It calls it to obtain a new
00010  * scattered block data decomposition of a distributed COMPLEX*16 (block
00011  * scattered) matrix. Then it calls PZTRMR2D for the inverse redistribution
00012  * and checks the results with the initial data.
00013  * 
00014  * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the
00015  * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the
00016  * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition
00017  * on the processor grid p0 x q0.
00018  * 
00019  * See pztrmr.c file for detailed info on the PZTRMR2D function.
00020  * 
00021  * 
00022  * The testing parameters are read from the file TRMR2D.dat, see the file in the
00023  * distribution to have an example.
00024  * 
00025  * created by Bernard Tourancheau in April 1994.
00026  * 
00027  * modifications : see sccs history
00028  * 
00029  * ===================================
00030  * 
00031  * 
00032  * NOTE :
00033  * 
00034  * - the matrix elements are COMPLEX*16
00035  * 
00036  * - memory requirements : this procedure requires approximately 3 times the
00037  * memory space of the initial data block in grid 0 (initial block, copy for
00038  * test and second redistribution result) and 1 time the memory space of the
00039  * result data block in grid 1. with  the element size = sizeof(dcomplex)
00040  * bytes,
00041  * 
00042  * 
00043  * - use the procedures of the files:
00044  * 
00045  * pztrmr.o pztrmr2.o pztrmraux.o
00046  * 
00047  * 
00048  * ======================================
00049  * 
00050  * WARNING ASSUMPTIONS :
00051  * 
00052  * 
00053  * ========================================
00054  * 
00055  * 
00056  * Planned changes:
00057  * 
00058  * 
00059  * 
00060  * ========================================= */
00061 #define static2 static
00062 #if defined(Add_) || defined(f77IsF2C)
00063 #define fortran_mr2d pztrmr2do_
00064 #define fortran_mr2dnew pztrmr2d_
00065 #elif defined(UpCase)
00066 #define fortran_mr2dnew PZTRMR2D
00067 #define fortran_mr2d PZTRMR2DO
00068 #define zcopy_ ZCOPY
00069 #define zlacpy_ ZLACPY
00070 #else
00071 #define fortran_mr2d pztrmr2do
00072 #define fortran_mr2dnew pztrmr2d
00073 #define zcopy_ zcopy
00074 #define zlacpy_ zlacpy
00075 #endif
00076 #define Clacpy Cztrlacpy
00077 void  Clacpy();
00078 typedef struct {
00079   double r, i;
00080 }     dcomplex;
00081 typedef struct {
00082   int   desctype;
00083   int   ctxt;
00084   int   m;
00085   int   n;
00086   int   nbrow;
00087   int   nbcol;
00088   int   sprow;
00089   int   spcol;
00090   int   lda;
00091 }     MDESC;
00092 #define BLOCK_CYCLIC_2D 1
00093 typedef struct {
00094   int   gstart;
00095   int   len;
00096 }     IDESC;
00097 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
00098 #define max(A,B) ((A)>(B)?(A):(B))
00099 #define min(A,B) ((A)>(B)?(B):(A))
00100 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
00101 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
00102 #ifdef MALLOCDEBUG
00103 #define malloc mymalloc
00104 #define free myfree
00105 #define realloc myrealloc
00106 #endif
00107 /* Cblacs */
00108 extern void Cblacs_pcoord();
00109 extern int Cblacs_pnum();
00110 extern void Csetpvmtids();
00111 extern void Cblacs_get();
00112 extern void Cblacs_pinfo();
00113 extern void Cblacs_gridinfo();
00114 extern void Cblacs_gridinit();
00115 extern void Cblacs_exit();
00116 extern void Cblacs_gridexit();
00117 extern void Cblacs_setup();
00118 extern void Cigebs2d();
00119 extern void Cigebr2d();
00120 extern void Cigesd2d();
00121 extern void Cigerv2d();
00122 extern void Cigsum2d();
00123 extern void Cigamn2d();
00124 extern void Cigamx2d();
00125 extern void Czgesd2d();
00126 extern void Czgerv2d();
00127 /* lapack */
00128 void  zlacpy_();
00129 /* aux fonctions */
00130 extern int localindice();
00131 extern void *mr2d_malloc();
00132 extern int ppcm();
00133 extern int localsize();
00134 extern int memoryblocksize();
00135 extern int changeorigin();
00136 extern void paramcheck();
00137 /* tools and others function */
00138 #define scanD0 ztrscanD0
00139 #define dispmat ztrdispmat
00140 #define setmemory ztrsetmemory
00141 #define freememory ztrfreememory
00142 #define scan_intervals ztrscan_intervals
00143 extern void scanD0();
00144 extern void dispmat();
00145 extern void setmemory();
00146 extern void freememory();
00147 extern int scan_intervals();
00148 extern void Cpztrmr2do();
00149 extern void Cpztrmr2d();
00150 /* some defines for Cpztrmr2do */
00151 #define SENDBUFF 0
00152 #define RECVBUFF 1
00153 #define SIZEBUFF 2
00154 #if 0
00155 #define DEBUG
00156 #endif
00157 #ifndef DEBUG
00158 #define NDEBUG
00159 #endif
00160 #include <stdio.h>
00161 #include <stdlib.h>
00162 #include <string.h>
00163 #include <ctype.h>
00164 #include <assert.h>
00165 /* initblock: intialize the local part of a matrix with random data (well,
00166  * not very random) */
00167 static2 void
00168 initblock(block, m, n)
00169   dcomplex *block;
00170   int   m, n;
00171 {
00172   dcomplex *pdata;
00173   int   i;
00174   pdata = block;
00175   for (i = 0; i < m * n; i++, pdata++) {
00176     (*pdata).r = i;
00177   };
00178 }
00179 /* getparam:read from a file a list of integer parameters, the end of the
00180  * parameters to read is given by a NULL at the end of the args list */
00181 #ifdef __STDC__
00182 #include <stdarg.h>
00183 static void 
00184 getparam(FILE * f,...)
00185 {
00186 #else
00187 #include <varargs.h>
00188 static void 
00189 getparam(va_alist)
00190 va_dcl
00191 {
00192   FILE *f;
00193 #endif
00194   va_list ap;
00195   int   i;
00196   static int nbline;
00197   char *ptr, *next;
00198   int  *var;
00199   static char buffer[200];
00200 #ifdef __STDC__
00201   va_start(ap, f);
00202 #else
00203   va_start(ap);
00204   f = va_arg(ap, FILE *);
00205 #endif
00206   do {
00207     next = fgets(buffer, 200, f);
00208     if (next == NULL) {
00209       fprintf(stderr, "bad configuration driver file:after line %d\n", nbline);
00210       exit(1);
00211     }
00212     nbline += 1;
00213   } while (buffer[0] == '#');
00214   ptr = buffer;
00215   var = va_arg(ap, int *);
00216   while (var != NULL) {
00217     *var = strtol(ptr, &next, 10);
00218     if (ptr == next) {
00219       fprintf(stderr, "bad configuration driver file:error line %d\n", nbline);
00220       exit(1);
00221     }
00222     ptr = next;
00223     var = va_arg(ap, int *);
00224   }
00225   va_end(ap);
00226 }
00227 void 
00228 initforpvm(argc, argv)
00229   int   argc;
00230   char *argv[];
00231 {
00232   int   pnum, nproc;
00233   Cblacs_pinfo(&pnum, &nproc);
00234   if (nproc < 1) {      /* we are with PVM */
00235     if (pnum == 0) {
00236       if (argc < 2) {
00237         fprintf(stderr, "usage with PVM:xztrmr nbproc\n\
00238 \t where nbproc is the number of nodes to initialize\n");
00239         exit(1);
00240       }
00241       nproc = atoi(argv[1]);
00242     }
00243     Cblacs_setup(&pnum, &nproc);
00244   }
00245 }
00246 int
00247 main(argc, argv)
00248   int   argc;
00249   char *argv[];
00250 {
00251   /* We initialize the data-block on the current processor, then redistribute
00252    * it, and perform the inverse redistribution  to compare the local memory
00253    * with the initial one. */
00254   /* Data file */
00255   FILE *fp;
00256   int   nbre, nbremax;
00257   /* Data distribution 0 parameters */
00258   int   p0,     /* # of rows in the processor grid */
00259         q0;     /* # of columns in the processor grid */
00260   /* Data distribution 1 parameters */
00261   int   p1, q1;
00262   /* # of parameter to be read on the keyboard */
00263 #define nbparameter 24
00264   /* General variables */
00265   int   blocksize0;
00266   int   mypnum, nprocs;
00267   int   parameters[nbparameter], nberrors;
00268   int   i;
00269   int   ia, ja, ib, jb, m, n;
00270   int   gcontext, context0, context1;
00271   int   myprow1, myprow0, mypcol0, mypcol1;
00272   int   dummy;
00273   MDESC ma, mb;
00274   char *uplo, *diag;
00275   dcomplex *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide;
00276 #ifdef UsingMpiBlacs
00277    MPI_Init(&argc, &argv);
00278 #endif
00279   setvbuf(stdout, NULL, _IOLBF, 0);
00280   setvbuf(stderr, NULL, _IOLBF, 0);
00281 #ifdef T3D
00282   free(malloc(14000000));
00283 #endif
00284   initforpvm(argc, argv);
00285   /* Read physical parameters */
00286   Cblacs_pinfo(&mypnum, &nprocs);
00287   /* initialize BLACS for the parameter communication */
00288   Cblacs_get(0, 0, &gcontext);
00289   Cblacs_gridinit(&gcontext, "R", nprocs, 1);
00290   Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy);
00291   if (mypnum == 0) {
00292     if ((fp = fopen("TRMR2D.dat", "r")) == NULL) {
00293       fprintf(stderr, "Can't open TRMR2D.dat\n");
00294       exit(1);
00295     };
00296     printf("\n// ZTRMR2D TESTER for COMPLEX*16 //\n");
00297     getparam(fp, &nbre, NULL);
00298     printf("////////// %d tests \n\n", nbre);
00299     parameters[0] = nbre;
00300     Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1);
00301   } else {
00302     Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0);
00303     nbre = parameters[0];
00304   };
00305   if (mypnum == 0) {
00306     printf("\n  m   n  m0  n0  sr0 sc0 i0  j0  p0  q0 nbr0 nbc0 \
00307 m1  n1  sr1 sc1 i1  j1  p1  q1 nbr1 nbc1\n\n");
00308   };
00309   /****** TEST LOOP *****/
00310   /* Here we are in grip 1xnprocs */
00311   nbremax = nbre;
00312 #ifdef DEBUG
00313   fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum);
00314 #endif
00315   while (nbre-- != 0) { /* Loop on the serie of tests */
00316     /* All the processors read the parameters so we have to be in a 1xnprocs
00317      * grid at each iteration */
00318     /* Read processors grid and matrices parameters */
00319     if (mypnum == 0) {
00320       int   u, d;
00321       getparam(fp,
00322                &m, &n,
00323                &ma.m, &ma.n, &ma.sprow, &ma.spcol,
00324                &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol,
00325                &mb.m, &mb.n, &mb.sprow, &mb.spcol,
00326                &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol,
00327                &u, &d,
00328                NULL);
00329       uplo = u ? "UPPER" : "LOWER";
00330       diag = d ? "UNIT" : "NONUNIT";
00331       printf("\t\t************* TEST # %d **********\n",
00332              nbremax - nbre);
00333       printf(" %3d %3d %3d %3d %3d %3d %3d %3d \
00334 %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d",
00335              m, n,
00336              ma.m, ma.n, ma.sprow, ma.spcol,
00337              ia, ja, p0, q0, ma.nbrow, ma.nbcol,
00338              mb.m, mb.n, mb.sprow, mb.spcol,
00339              ib, jb, p1, q1, mb.nbrow, mb.nbcol);
00340       printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low",
00341              toupper(*diag) == 'U' ? "unit" : "nonunit");
00342       printf("\n");
00343       if (p0 * q0 > nprocs || p1 * q1 > nprocs) {
00344         fprintf(stderr, "not enough nodes:%d processors required\n",
00345                 max(p0 * q0, p1 * q1));
00346         exit(1);
00347       }
00348       parameters[0] = p0;
00349       parameters[1] = q0;
00350       parameters[2] = ma.nbrow;
00351       parameters[3] = ma.nbcol;
00352       parameters[4] = p1;
00353       parameters[5] = q1;
00354       parameters[6] = mb.nbrow;
00355       parameters[7] = mb.nbcol;
00356       parameters[8] = ma.m;
00357       parameters[9] = ma.n;
00358       parameters[10] = ma.sprow;
00359       parameters[11] = ma.spcol;
00360       parameters[12] = mb.sprow;
00361       parameters[13] = mb.spcol;
00362       parameters[14] = ia;
00363       parameters[15] = ja;
00364       parameters[16] = ib;
00365       parameters[17] = jb;
00366       parameters[18] = m;
00367       parameters[19] = n;
00368       parameters[20] = mb.m;
00369       parameters[21] = mb.n;
00370       parameters[22] = *uplo == 'U';
00371       parameters[23] = *diag == 'U';
00372       Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1);
00373     } else {
00374       Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0);
00375       p0 = parameters[0];
00376       q0 = parameters[1];
00377       ma.nbrow = parameters[2];
00378       ma.nbcol = parameters[3];
00379       p1 = parameters[4];
00380       q1 = parameters[5];
00381       mb.nbrow = parameters[6];
00382       mb.nbcol = parameters[7];
00383       ma.m = parameters[8];
00384       ma.n = parameters[9];
00385       ma.sprow = parameters[10];
00386       ma.spcol = parameters[11];
00387       mb.sprow = parameters[12];
00388       mb.spcol = parameters[13];
00389       ia = parameters[14];
00390       ja = parameters[15];
00391       ib = parameters[16];
00392       jb = parameters[17];
00393       m = parameters[18];
00394       n = parameters[19];
00395       mb.m = parameters[20];
00396       mb.n = parameters[21];
00397       ma.desctype = BLOCK_CYCLIC_2D;
00398       mb.desctype = BLOCK_CYCLIC_2D;
00399       uplo = parameters[22] ? "UPPER" : "LOWER";
00400       diag = parameters[23] ? "UNIT" : "NONUNIT";
00401     };
00402     Cblacs_get(0, 0, &context0);
00403     Cblacs_gridinit(&context0, "R", p0, q0);
00404     Cblacs_get(0, 0, &context1);
00405     Cblacs_gridinit(&context1, "R", p1, q1);
00406     Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0);
00407     if (myprow0 >= p0 || mypcol0 >= q0)
00408       myprow0 = mypcol0 = -1;
00409     Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1);
00410     if (myprow1 >= p1 || mypcol1 >= q1)
00411       myprow1 = mypcol1 = -1;
00412     assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
00413     assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
00414     ma.ctxt = context0;
00415     mb.ctxt = context1;
00416     /* From here, we are not assuming that only the processors working in the
00417      * redistribution are calling  xxMR2D, but the ones not concerned will do
00418      * nothing. */
00419     /* We compute the exact size of the local memory block for the memory
00420      * allocations */
00421     if (myprow0 >= 0 && mypcol0 >= 0) {
00422       blocksize0 = memoryblocksize(&ma);
00423       ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m);
00424       setmemory(&ptrmyblock, blocksize0);
00425       initblock(ptrmyblock, 1, blocksize0);
00426       setmemory(&ptrmyblockcopy, blocksize0);
00427       memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock,
00428              blocksize0 * sizeof(dcomplex));
00429       setmemory(&ptrmyblockvide, blocksize0);
00430       for (i = 0; i < blocksize0; i++)
00431         ptrmyblockvide[i].r = -1;
00432     };  /* if (mypnum < p0 * q0) */
00433     if (myprow1 >= 0 && mypcol1 >= 0) {
00434       setmemory(&ptrsavemyblock, memoryblocksize(&mb));
00435       mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m);
00436     };  /* if (mypnum < p1 * q1)  */
00437     /* Redistribute the matrix from grid 0 to grid 1 (memory location
00438      * ptrmyblock to ptrsavemyblock) */
00439     Cpztrmr2d(uplo, diag, m, n,
00440               ptrmyblock, ia, ja, &ma,
00441               ptrsavemyblock, ib, jb, &mb, gcontext);
00442     /* Perform the inverse redistribution of the matrix from grid 1 to grid 0
00443      * (memory location ptrsavemyblock to ptrmyblockvide) */
00444     Cpztrmr2d(uplo, diag, m, n,
00445               ptrsavemyblock, ib, jb, &mb,
00446               ptrmyblockvide, ia, ja, &ma, gcontext);
00447     /* Check the differences */
00448     nberrors = 0;
00449     if (myprow0 >= 0 && mypcol0 >= 0) {
00450       /* only for the processors that do have data at the begining */
00451       for (i = 0; i < blocksize0; i++) {
00452         int   li, lj, gi, gj;
00453         int   in;
00454         in = 1;
00455         li = i % ma.lda;
00456         lj = i / ma.lda;
00457         gi = (li / ma.nbrow) * p0 * ma.nbrow +
00458               SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow;
00459         gj = (lj / ma.nbcol) * q0 * ma.nbcol +
00460               SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol;
00461         assert(gi < ma.m && gj < ma.n);
00462         gi -= (ia - 1);
00463         gj -= (ja - 1);
00464         if (gi < 0 || gj < 0 || gi >= m || gj >= n)
00465           in = 0;
00466         else if (toupper(*uplo) == 'U')
00467           in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U'));
00468         else
00469           in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U'));
00470         if (!in) {
00471           ptrmyblockcopy[i].r = -1;
00472         }
00473         if (ptrmyblockvide[i].r != ptrmyblockcopy[i].r) {
00474           nberrors++;
00475           printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n"
00476                  ,mypnum, i,
00477                  ptrmyblockvide[i].r, ptrmyblockcopy[i].r);
00478         };
00479       };
00480       if (nberrors > 0) {
00481         printf("Processor %d, has tested  %d COMPLEX*16 elements,\
00482 Number of redistribution errors = %d \n",
00483                mypnum, blocksize0, nberrors);
00484       }
00485     }
00486     /* Look at the errors on all the processors at this point. */
00487     Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0);
00488     if (mypnum == 0)
00489       if (nberrors)
00490         printf("  => Total number of redistribution errors = %d \n",
00491                nberrors);
00492       else
00493         printf("TEST PASSED OK\n");
00494     /* release memory for the next iteration */
00495     if (myprow0 >= 0 && mypcol0 >= 0) {
00496       freememory((char *) ptrmyblock);
00497       freememory((char *) ptrmyblockvide);
00498       freememory((char *) ptrmyblockcopy);
00499     };  /* if (mypnum < p0 * q0) */
00500     /* release memory for the next iteration */
00501     if (myprow1 >= 0 && mypcol1 >= 0) {
00502       freememory((char *) ptrsavemyblock);
00503     };
00504     if (myprow0 >= 0)
00505       Cblacs_gridexit(context0);
00506     if (myprow1 >= 0)
00507       Cblacs_gridexit(context1);
00508   };    /* while nbre != 0 */
00509   if (mypnum == 0) {
00510     fclose(fp);
00511   };
00512   Cblacs_exit(0);
00513   return 0;
00514 }/* main */