|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 #include "redist.h" 00002 /* $Id: pstrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $ 00003 * 00004 * pstrmrdrv.c : 00005 * 00006 * 00007 * PURPOSE: 00008 * 00009 * this driver is testing the PSTRMR2D routine. It calls it to obtain a new 00010 * scattered block data decomposition of a distributed REAL (block scattered) 00011 * matrix. Then it calls PSTRMR2D for the inverse redistribution and checks 00012 * the results with the initial data. 00013 * 00014 * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the 00015 * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the 00016 * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition 00017 * on the processor grid p0 x q0. 00018 * 00019 * See pstrmr.c file for detailed info on the PSTRMR2D function. 00020 * 00021 * 00022 * The testing parameters are read from the file TRMR2D.dat, see the file in the 00023 * distribution to have an example. 00024 * 00025 * created by Bernard Tourancheau in April 1994. 00026 * 00027 * modifications : see sccs history 00028 * 00029 * =================================== 00030 * 00031 * 00032 * NOTE : 00033 * 00034 * - the matrix elements are REAL 00035 * 00036 * - memory requirements : this procedure requires approximately 3 times the 00037 * memory space of the initial data block in grid 0 (initial block, copy for 00038 * test and second redistribution result) and 1 time the memory space of the 00039 * result data block in grid 1. with the element size = sizeof(float) bytes, 00040 * 00041 * 00042 * - use the procedures of the files: 00043 * 00044 * pstrmr.o pstrmr2.o pstrmraux.o 00045 * 00046 * 00047 * ====================================== 00048 * 00049 * WARNING ASSUMPTIONS : 00050 * 00051 * 00052 * ======================================== 00053 * 00054 * 00055 * Planned changes: 00056 * 00057 * 00058 * 00059 * ========================================= */ 00060 #define static2 static 00061 #if defined(Add_) || defined(f77IsF2C) 00062 #define fortran_mr2d pstrmr2do_ 00063 #define fortran_mr2dnew pstrmr2d_ 00064 #elif defined(UpCase) 00065 #define fortran_mr2dnew PSTRMR2D 00066 #define fortran_mr2d PSTRMR2DO 00067 #define scopy_ SCOPY 00068 #define slacpy_ SLACPY 00069 #else 00070 #define fortran_mr2d pstrmr2do 00071 #define fortran_mr2dnew pstrmr2d 00072 #define scopy_ scopy 00073 #define slacpy_ slacpy 00074 #endif 00075 #define Clacpy Cstrlacpy 00076 void Clacpy(); 00077 typedef struct { 00078 int desctype; 00079 int ctxt; 00080 int m; 00081 int n; 00082 int nbrow; 00083 int nbcol; 00084 int sprow; 00085 int spcol; 00086 int lda; 00087 } MDESC; 00088 #define BLOCK_CYCLIC_2D 1 00089 typedef struct { 00090 int gstart; 00091 int len; 00092 } IDESC; 00093 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) 00094 #define max(A,B) ((A)>(B)?(A):(B)) 00095 #define min(A,B) ((A)>(B)?(B):(A)) 00096 #define DIVUP(a,b) ( ((a)-1) /(b)+1) 00097 #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) 00098 #ifdef MALLOCDEBUG 00099 #define malloc mymalloc 00100 #define free myfree 00101 #define realloc myrealloc 00102 #endif 00103 /* Cblacs */ 00104 extern void Cblacs_pcoord(); 00105 extern int Cblacs_pnum(); 00106 extern void Csetpvmtids(); 00107 extern void Cblacs_get(); 00108 extern void Cblacs_pinfo(); 00109 extern void Cblacs_gridinfo(); 00110 extern void Cblacs_gridinit(); 00111 extern void Cblacs_exit(); 00112 extern void Cblacs_gridexit(); 00113 extern void Cblacs_setup(); 00114 extern void Cigebs2d(); 00115 extern void Cigebr2d(); 00116 extern void Cigesd2d(); 00117 extern void Cigerv2d(); 00118 extern void Cigsum2d(); 00119 extern void Cigamn2d(); 00120 extern void Cigamx2d(); 00121 extern void Csgesd2d(); 00122 extern void Csgerv2d(); 00123 /* lapack */ 00124 void slacpy_(); 00125 /* aux fonctions */ 00126 extern int localindice(); 00127 extern void *mr2d_malloc(); 00128 extern int ppcm(); 00129 extern int localsize(); 00130 extern int memoryblocksize(); 00131 extern int changeorigin(); 00132 extern void paramcheck(); 00133 /* tools and others function */ 00134 #define scanD0 strscanD0 00135 #define dispmat strdispmat 00136 #define setmemory strsetmemory 00137 #define freememory strfreememory 00138 #define scan_intervals strscan_intervals 00139 extern void scanD0(); 00140 extern void dispmat(); 00141 extern void setmemory(); 00142 extern void freememory(); 00143 extern int scan_intervals(); 00144 extern void Cpstrmr2do(); 00145 extern void Cpstrmr2d(); 00146 /* some defines for Cpstrmr2do */ 00147 #define SENDBUFF 0 00148 #define RECVBUFF 1 00149 #define SIZEBUFF 2 00150 #if 0 00151 #define DEBUG 00152 #endif 00153 #ifndef DEBUG 00154 #define NDEBUG 00155 #endif 00156 #include <stdio.h> 00157 #include <stdlib.h> 00158 #include <string.h> 00159 #include <ctype.h> 00160 #include <assert.h> 00161 /* initblock: intialize the local part of a matrix with random data (well, 00162 * not very random) */ 00163 static2 void 00164 initblock(block, m, n) 00165 float *block; 00166 int m, n; 00167 { 00168 float *pdata; 00169 int i; 00170 pdata = block; 00171 for (i = 0; i < m * n; i++, pdata++) { 00172 (*pdata) = i; 00173 }; 00174 } 00175 /* getparam:read from a file a list of integer parameters, the end of the 00176 * parameters to read is given by a NULL at the end of the args list */ 00177 #ifdef __STDC__ 00178 #include <stdarg.h> 00179 static void 00180 getparam(FILE * f,...) 00181 { 00182 #else 00183 #include <varargs.h> 00184 static void 00185 getparam(va_alist) 00186 va_dcl 00187 { 00188 FILE *f; 00189 #endif 00190 va_list ap; 00191 int i; 00192 static int nbline; 00193 char *ptr, *next; 00194 int *var; 00195 static char buffer[200]; 00196 #ifdef __STDC__ 00197 va_start(ap, f); 00198 #else 00199 va_start(ap); 00200 f = va_arg(ap, FILE *); 00201 #endif 00202 do { 00203 next = fgets(buffer, 200, f); 00204 if (next == NULL) { 00205 fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); 00206 exit(1); 00207 } 00208 nbline += 1; 00209 } while (buffer[0] == '#'); 00210 ptr = buffer; 00211 var = va_arg(ap, int *); 00212 while (var != NULL) { 00213 *var = strtol(ptr, &next, 10); 00214 if (ptr == next) { 00215 fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); 00216 exit(1); 00217 } 00218 ptr = next; 00219 var = va_arg(ap, int *); 00220 } 00221 va_end(ap); 00222 } 00223 void 00224 initforpvm(argc, argv) 00225 int argc; 00226 char *argv[]; 00227 { 00228 int pnum, nproc; 00229 Cblacs_pinfo(&pnum, &nproc); 00230 if (nproc < 1) { /* we are with PVM */ 00231 if (pnum == 0) { 00232 if (argc < 2) { 00233 fprintf(stderr, "usage with PVM:xstrmr nbproc\n\ 00234 \t where nbproc is the number of nodes to initialize\n"); 00235 exit(1); 00236 } 00237 nproc = atoi(argv[1]); 00238 } 00239 Cblacs_setup(&pnum, &nproc); 00240 } 00241 } 00242 int 00243 main(argc, argv) 00244 int argc; 00245 char *argv[]; 00246 { 00247 /* We initialize the data-block on the current processor, then redistribute 00248 * it, and perform the inverse redistribution to compare the local memory 00249 * with the initial one. */ 00250 /* Data file */ 00251 FILE *fp; 00252 int nbre, nbremax; 00253 /* Data distribution 0 parameters */ 00254 int p0, /* # of rows in the processor grid */ 00255 q0; /* # of columns in the processor grid */ 00256 /* Data distribution 1 parameters */ 00257 int p1, q1; 00258 /* # of parameter to be read on the keyboard */ 00259 #define nbparameter 24 00260 /* General variables */ 00261 int blocksize0; 00262 int mypnum, nprocs; 00263 int parameters[nbparameter], nberrors; 00264 int i; 00265 int ia, ja, ib, jb, m, n; 00266 int gcontext, context0, context1; 00267 int myprow1, myprow0, mypcol0, mypcol1; 00268 int dummy; 00269 MDESC ma, mb; 00270 char *uplo, *diag; 00271 float *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; 00272 #ifdef UsingMpiBlacs 00273 MPI_Init(&argc, &argv); 00274 #endif 00275 setvbuf(stdout, NULL, _IOLBF, 0); 00276 setvbuf(stderr, NULL, _IOLBF, 0); 00277 #ifdef T3D 00278 free(malloc(14000000)); 00279 #endif 00280 initforpvm(argc, argv); 00281 /* Read physical parameters */ 00282 Cblacs_pinfo(&mypnum, &nprocs); 00283 /* initialize BLACS for the parameter communication */ 00284 Cblacs_get(0, 0, &gcontext); 00285 Cblacs_gridinit(&gcontext, "R", nprocs, 1); 00286 Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); 00287 if (mypnum == 0) { 00288 if ((fp = fopen("TRMR2D.dat", "r")) == NULL) { 00289 fprintf(stderr, "Can't open TRMR2D.dat\n"); 00290 exit(1); 00291 }; 00292 printf("\n// STRMR2D TESTER for REAL //\n"); 00293 getparam(fp, &nbre, NULL); 00294 printf("////////// %d tests \n\n", nbre); 00295 parameters[0] = nbre; 00296 Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); 00297 } else { 00298 Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); 00299 nbre = parameters[0]; 00300 }; 00301 if (mypnum == 0) { 00302 printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ 00303 m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); 00304 }; 00305 /****** TEST LOOP *****/ 00306 /* Here we are in grip 1xnprocs */ 00307 nbremax = nbre; 00308 #ifdef DEBUG 00309 fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); 00310 #endif 00311 while (nbre-- != 0) { /* Loop on the serie of tests */ 00312 /* All the processors read the parameters so we have to be in a 1xnprocs 00313 * grid at each iteration */ 00314 /* Read processors grid and matrices parameters */ 00315 if (mypnum == 0) { 00316 int u, d; 00317 getparam(fp, 00318 &m, &n, 00319 &ma.m, &ma.n, &ma.sprow, &ma.spcol, 00320 &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, 00321 &mb.m, &mb.n, &mb.sprow, &mb.spcol, 00322 &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, 00323 &u, &d, 00324 NULL); 00325 uplo = u ? "UPPER" : "LOWER"; 00326 diag = d ? "UNIT" : "NONUNIT"; 00327 printf("\t\t************* TEST # %d **********\n", 00328 nbremax - nbre); 00329 printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ 00330 %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", 00331 m, n, 00332 ma.m, ma.n, ma.sprow, ma.spcol, 00333 ia, ja, p0, q0, ma.nbrow, ma.nbcol, 00334 mb.m, mb.n, mb.sprow, mb.spcol, 00335 ib, jb, p1, q1, mb.nbrow, mb.nbcol); 00336 printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low", 00337 toupper(*diag) == 'U' ? "unit" : "nonunit"); 00338 printf("\n"); 00339 if (p0 * q0 > nprocs || p1 * q1 > nprocs) { 00340 fprintf(stderr, "not enough nodes:%d processors required\n", 00341 max(p0 * q0, p1 * q1)); 00342 exit(1); 00343 } 00344 parameters[0] = p0; 00345 parameters[1] = q0; 00346 parameters[2] = ma.nbrow; 00347 parameters[3] = ma.nbcol; 00348 parameters[4] = p1; 00349 parameters[5] = q1; 00350 parameters[6] = mb.nbrow; 00351 parameters[7] = mb.nbcol; 00352 parameters[8] = ma.m; 00353 parameters[9] = ma.n; 00354 parameters[10] = ma.sprow; 00355 parameters[11] = ma.spcol; 00356 parameters[12] = mb.sprow; 00357 parameters[13] = mb.spcol; 00358 parameters[14] = ia; 00359 parameters[15] = ja; 00360 parameters[16] = ib; 00361 parameters[17] = jb; 00362 parameters[18] = m; 00363 parameters[19] = n; 00364 parameters[20] = mb.m; 00365 parameters[21] = mb.n; 00366 parameters[22] = *uplo == 'U'; 00367 parameters[23] = *diag == 'U'; 00368 Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); 00369 } else { 00370 Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); 00371 p0 = parameters[0]; 00372 q0 = parameters[1]; 00373 ma.nbrow = parameters[2]; 00374 ma.nbcol = parameters[3]; 00375 p1 = parameters[4]; 00376 q1 = parameters[5]; 00377 mb.nbrow = parameters[6]; 00378 mb.nbcol = parameters[7]; 00379 ma.m = parameters[8]; 00380 ma.n = parameters[9]; 00381 ma.sprow = parameters[10]; 00382 ma.spcol = parameters[11]; 00383 mb.sprow = parameters[12]; 00384 mb.spcol = parameters[13]; 00385 ia = parameters[14]; 00386 ja = parameters[15]; 00387 ib = parameters[16]; 00388 jb = parameters[17]; 00389 m = parameters[18]; 00390 n = parameters[19]; 00391 mb.m = parameters[20]; 00392 mb.n = parameters[21]; 00393 ma.desctype = BLOCK_CYCLIC_2D; 00394 mb.desctype = BLOCK_CYCLIC_2D; 00395 uplo = parameters[22] ? "UPPER" : "LOWER"; 00396 diag = parameters[23] ? "UNIT" : "NONUNIT"; 00397 }; 00398 Cblacs_get(0, 0, &context0); 00399 Cblacs_gridinit(&context0, "R", p0, q0); 00400 Cblacs_get(0, 0, &context1); 00401 Cblacs_gridinit(&context1, "R", p1, q1); 00402 Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); 00403 if (myprow0 >= p0 || mypcol0 >= q0) 00404 myprow0 = mypcol0 = -1; 00405 Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); 00406 if (myprow1 >= p1 || mypcol1 >= q1) 00407 myprow1 = mypcol1 = -1; 00408 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); 00409 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); 00410 ma.ctxt = context0; 00411 mb.ctxt = context1; 00412 /* From here, we are not assuming that only the processors working in the 00413 * redistribution are calling xxMR2D, but the ones not concerned will do 00414 * nothing. */ 00415 /* We compute the exact size of the local memory block for the memory 00416 * allocations */ 00417 if (myprow0 >= 0 && mypcol0 >= 0) { 00418 blocksize0 = memoryblocksize(&ma); 00419 ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); 00420 setmemory(&ptrmyblock, blocksize0); 00421 initblock(ptrmyblock, 1, blocksize0); 00422 setmemory(&ptrmyblockcopy, blocksize0); 00423 memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, 00424 blocksize0 * sizeof(float)); 00425 setmemory(&ptrmyblockvide, blocksize0); 00426 for (i = 0; i < blocksize0; i++) 00427 ptrmyblockvide[i] = -1; 00428 }; /* if (mypnum < p0 * q0) */ 00429 if (myprow1 >= 0 && mypcol1 >= 0) { 00430 setmemory(&ptrsavemyblock, memoryblocksize(&mb)); 00431 mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); 00432 }; /* if (mypnum < p1 * q1) */ 00433 /* Redistribute the matrix from grid 0 to grid 1 (memory location 00434 * ptrmyblock to ptrsavemyblock) */ 00435 Cpstrmr2d(uplo, diag, m, n, 00436 ptrmyblock, ia, ja, &ma, 00437 ptrsavemyblock, ib, jb, &mb, gcontext); 00438 /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 00439 * (memory location ptrsavemyblock to ptrmyblockvide) */ 00440 Cpstrmr2d(uplo, diag, m, n, 00441 ptrsavemyblock, ib, jb, &mb, 00442 ptrmyblockvide, ia, ja, &ma, gcontext); 00443 /* Check the differences */ 00444 nberrors = 0; 00445 if (myprow0 >= 0 && mypcol0 >= 0) { 00446 /* only for the processors that do have data at the begining */ 00447 for (i = 0; i < blocksize0; i++) { 00448 int li, lj, gi, gj; 00449 int in; 00450 in = 1; 00451 li = i % ma.lda; 00452 lj = i / ma.lda; 00453 gi = (li / ma.nbrow) * p0 * ma.nbrow + 00454 SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; 00455 gj = (lj / ma.nbcol) * q0 * ma.nbcol + 00456 SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; 00457 assert(gi < ma.m && gj < ma.n); 00458 gi -= (ia - 1); 00459 gj -= (ja - 1); 00460 if (gi < 0 || gj < 0 || gi >= m || gj >= n) 00461 in = 0; 00462 else if (toupper(*uplo) == 'U') 00463 in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U')); 00464 else 00465 in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U')); 00466 if (!in) { 00467 ptrmyblockcopy[i] = -1; 00468 } 00469 if (ptrmyblockvide[i] != ptrmyblockcopy[i]) { 00470 nberrors++; 00471 }; 00472 }; 00473 if (nberrors > 0) { 00474 printf("Processor %d, has tested %d REAL elements,\ 00475 Number of redistribution errors = %d \n", 00476 mypnum, blocksize0, nberrors); 00477 } 00478 } 00479 /* Look at the errors on all the processors at this point. */ 00480 Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); 00481 if (mypnum == 0) 00482 if (nberrors) 00483 printf(" => Total number of redistribution errors = %d \n", 00484 nberrors); 00485 else 00486 printf("TEST PASSED OK\n"); 00487 /* release memory for the next iteration */ 00488 if (myprow0 >= 0 && mypcol0 >= 0) { 00489 freememory((char *) ptrmyblock); 00490 freememory((char *) ptrmyblockvide); 00491 freememory((char *) ptrmyblockcopy); 00492 }; /* if (mypnum < p0 * q0) */ 00493 /* release memory for the next iteration */ 00494 if (myprow1 >= 0 && mypcol1 >= 0) { 00495 freememory((char *) ptrsavemyblock); 00496 }; 00497 if (myprow0 >= 0) 00498 Cblacs_gridexit(context0); 00499 if (myprow1 >= 0) 00500 Cblacs_gridexit(context1); 00501 }; /* while nbre != 0 */ 00502 if (mypnum == 0) { 00503 fclose(fp); 00504 }; 00505 Cblacs_exit(0); 00506 return 0; 00507 }/* main */