|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 #include "redist.h" 00002 /* $Id: pztrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $ 00003 * 00004 * pztrmrdrv.c : 00005 * 00006 * 00007 * PURPOSE: 00008 * 00009 * this driver is testing the PZTRMR2D routine. It calls it to obtain a new 00010 * scattered block data decomposition of a distributed COMPLEX*16 (block 00011 * scattered) matrix. Then it calls PZTRMR2D for the inverse redistribution 00012 * and checks the results with the initial data. 00013 * 00014 * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the 00015 * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the 00016 * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition 00017 * on the processor grid p0 x q0. 00018 * 00019 * See pztrmr.c file for detailed info on the PZTRMR2D function. 00020 * 00021 * 00022 * The testing parameters are read from the file TRMR2D.dat, see the file in the 00023 * distribution to have an example. 00024 * 00025 * created by Bernard Tourancheau in April 1994. 00026 * 00027 * modifications : see sccs history 00028 * 00029 * =================================== 00030 * 00031 * 00032 * NOTE : 00033 * 00034 * - the matrix elements are COMPLEX*16 00035 * 00036 * - memory requirements : this procedure requires approximately 3 times the 00037 * memory space of the initial data block in grid 0 (initial block, copy for 00038 * test and second redistribution result) and 1 time the memory space of the 00039 * result data block in grid 1. with the element size = sizeof(dcomplex) 00040 * bytes, 00041 * 00042 * 00043 * - use the procedures of the files: 00044 * 00045 * pztrmr.o pztrmr2.o pztrmraux.o 00046 * 00047 * 00048 * ====================================== 00049 * 00050 * WARNING ASSUMPTIONS : 00051 * 00052 * 00053 * ======================================== 00054 * 00055 * 00056 * Planned changes: 00057 * 00058 * 00059 * 00060 * ========================================= */ 00061 #define static2 static 00062 #if defined(Add_) || defined(f77IsF2C) 00063 #define fortran_mr2d pztrmr2do_ 00064 #define fortran_mr2dnew pztrmr2d_ 00065 #elif defined(UpCase) 00066 #define fortran_mr2dnew PZTRMR2D 00067 #define fortran_mr2d PZTRMR2DO 00068 #define zcopy_ ZCOPY 00069 #define zlacpy_ ZLACPY 00070 #else 00071 #define fortran_mr2d pztrmr2do 00072 #define fortran_mr2dnew pztrmr2d 00073 #define zcopy_ zcopy 00074 #define zlacpy_ zlacpy 00075 #endif 00076 #define Clacpy Cztrlacpy 00077 void Clacpy(); 00078 typedef struct { 00079 double r, i; 00080 } dcomplex; 00081 typedef struct { 00082 int desctype; 00083 int ctxt; 00084 int m; 00085 int n; 00086 int nbrow; 00087 int nbcol; 00088 int sprow; 00089 int spcol; 00090 int lda; 00091 } MDESC; 00092 #define BLOCK_CYCLIC_2D 1 00093 typedef struct { 00094 int gstart; 00095 int len; 00096 } IDESC; 00097 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) 00098 #define max(A,B) ((A)>(B)?(A):(B)) 00099 #define min(A,B) ((A)>(B)?(B):(A)) 00100 #define DIVUP(a,b) ( ((a)-1) /(b)+1) 00101 #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) 00102 #ifdef MALLOCDEBUG 00103 #define malloc mymalloc 00104 #define free myfree 00105 #define realloc myrealloc 00106 #endif 00107 /* Cblacs */ 00108 extern void Cblacs_pcoord(); 00109 extern int Cblacs_pnum(); 00110 extern void Csetpvmtids(); 00111 extern void Cblacs_get(); 00112 extern void Cblacs_pinfo(); 00113 extern void Cblacs_gridinfo(); 00114 extern void Cblacs_gridinit(); 00115 extern void Cblacs_exit(); 00116 extern void Cblacs_gridexit(); 00117 extern void Cblacs_setup(); 00118 extern void Cigebs2d(); 00119 extern void Cigebr2d(); 00120 extern void Cigesd2d(); 00121 extern void Cigerv2d(); 00122 extern void Cigsum2d(); 00123 extern void Cigamn2d(); 00124 extern void Cigamx2d(); 00125 extern void Czgesd2d(); 00126 extern void Czgerv2d(); 00127 /* lapack */ 00128 void zlacpy_(); 00129 /* aux fonctions */ 00130 extern int localindice(); 00131 extern void *mr2d_malloc(); 00132 extern int ppcm(); 00133 extern int localsize(); 00134 extern int memoryblocksize(); 00135 extern int changeorigin(); 00136 extern void paramcheck(); 00137 /* tools and others function */ 00138 #define scanD0 ztrscanD0 00139 #define dispmat ztrdispmat 00140 #define setmemory ztrsetmemory 00141 #define freememory ztrfreememory 00142 #define scan_intervals ztrscan_intervals 00143 extern void scanD0(); 00144 extern void dispmat(); 00145 extern void setmemory(); 00146 extern void freememory(); 00147 extern int scan_intervals(); 00148 extern void Cpztrmr2do(); 00149 extern void Cpztrmr2d(); 00150 /* some defines for Cpztrmr2do */ 00151 #define SENDBUFF 0 00152 #define RECVBUFF 1 00153 #define SIZEBUFF 2 00154 #if 0 00155 #define DEBUG 00156 #endif 00157 #ifndef DEBUG 00158 #define NDEBUG 00159 #endif 00160 #include <stdio.h> 00161 #include <stdlib.h> 00162 #include <string.h> 00163 #include <ctype.h> 00164 #include <assert.h> 00165 /* initblock: intialize the local part of a matrix with random data (well, 00166 * not very random) */ 00167 static2 void 00168 initblock(block, m, n) 00169 dcomplex *block; 00170 int m, n; 00171 { 00172 dcomplex *pdata; 00173 int i; 00174 pdata = block; 00175 for (i = 0; i < m * n; i++, pdata++) { 00176 (*pdata).r = i; 00177 }; 00178 } 00179 /* getparam:read from a file a list of integer parameters, the end of the 00180 * parameters to read is given by a NULL at the end of the args list */ 00181 #ifdef __STDC__ 00182 #include <stdarg.h> 00183 static void 00184 getparam(FILE * f,...) 00185 { 00186 #else 00187 #include <varargs.h> 00188 static void 00189 getparam(va_alist) 00190 va_dcl 00191 { 00192 FILE *f; 00193 #endif 00194 va_list ap; 00195 int i; 00196 static int nbline; 00197 char *ptr, *next; 00198 int *var; 00199 static char buffer[200]; 00200 #ifdef __STDC__ 00201 va_start(ap, f); 00202 #else 00203 va_start(ap); 00204 f = va_arg(ap, FILE *); 00205 #endif 00206 do { 00207 next = fgets(buffer, 200, f); 00208 if (next == NULL) { 00209 fprintf(stderr, "bad configuration driver file:after line %d\n", nbline); 00210 exit(1); 00211 } 00212 nbline += 1; 00213 } while (buffer[0] == '#'); 00214 ptr = buffer; 00215 var = va_arg(ap, int *); 00216 while (var != NULL) { 00217 *var = strtol(ptr, &next, 10); 00218 if (ptr == next) { 00219 fprintf(stderr, "bad configuration driver file:error line %d\n", nbline); 00220 exit(1); 00221 } 00222 ptr = next; 00223 var = va_arg(ap, int *); 00224 } 00225 va_end(ap); 00226 } 00227 void 00228 initforpvm(argc, argv) 00229 int argc; 00230 char *argv[]; 00231 { 00232 int pnum, nproc; 00233 Cblacs_pinfo(&pnum, &nproc); 00234 if (nproc < 1) { /* we are with PVM */ 00235 if (pnum == 0) { 00236 if (argc < 2) { 00237 fprintf(stderr, "usage with PVM:xztrmr nbproc\n\ 00238 \t where nbproc is the number of nodes to initialize\n"); 00239 exit(1); 00240 } 00241 nproc = atoi(argv[1]); 00242 } 00243 Cblacs_setup(&pnum, &nproc); 00244 } 00245 } 00246 int 00247 main(argc, argv) 00248 int argc; 00249 char *argv[]; 00250 { 00251 /* We initialize the data-block on the current processor, then redistribute 00252 * it, and perform the inverse redistribution to compare the local memory 00253 * with the initial one. */ 00254 /* Data file */ 00255 FILE *fp; 00256 int nbre, nbremax; 00257 /* Data distribution 0 parameters */ 00258 int p0, /* # of rows in the processor grid */ 00259 q0; /* # of columns in the processor grid */ 00260 /* Data distribution 1 parameters */ 00261 int p1, q1; 00262 /* # of parameter to be read on the keyboard */ 00263 #define nbparameter 24 00264 /* General variables */ 00265 int blocksize0; 00266 int mypnum, nprocs; 00267 int parameters[nbparameter], nberrors; 00268 int i; 00269 int ia, ja, ib, jb, m, n; 00270 int gcontext, context0, context1; 00271 int myprow1, myprow0, mypcol0, mypcol1; 00272 int dummy; 00273 MDESC ma, mb; 00274 char *uplo, *diag; 00275 dcomplex *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide; 00276 #ifdef UsingMpiBlacs 00277 MPI_Init(&argc, &argv); 00278 #endif 00279 setvbuf(stdout, NULL, _IOLBF, 0); 00280 setvbuf(stderr, NULL, _IOLBF, 0); 00281 #ifdef T3D 00282 free(malloc(14000000)); 00283 #endif 00284 initforpvm(argc, argv); 00285 /* Read physical parameters */ 00286 Cblacs_pinfo(&mypnum, &nprocs); 00287 /* initialize BLACS for the parameter communication */ 00288 Cblacs_get(0, 0, &gcontext); 00289 Cblacs_gridinit(&gcontext, "R", nprocs, 1); 00290 Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy); 00291 if (mypnum == 0) { 00292 if ((fp = fopen("TRMR2D.dat", "r")) == NULL) { 00293 fprintf(stderr, "Can't open TRMR2D.dat\n"); 00294 exit(1); 00295 }; 00296 printf("\n// ZTRMR2D TESTER for COMPLEX*16 //\n"); 00297 getparam(fp, &nbre, NULL); 00298 printf("////////// %d tests \n\n", nbre); 00299 parameters[0] = nbre; 00300 Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1); 00301 } else { 00302 Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0); 00303 nbre = parameters[0]; 00304 }; 00305 if (mypnum == 0) { 00306 printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \ 00307 m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n"); 00308 }; 00309 /****** TEST LOOP *****/ 00310 /* Here we are in grip 1xnprocs */ 00311 nbremax = nbre; 00312 #ifdef DEBUG 00313 fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum); 00314 #endif 00315 while (nbre-- != 0) { /* Loop on the serie of tests */ 00316 /* All the processors read the parameters so we have to be in a 1xnprocs 00317 * grid at each iteration */ 00318 /* Read processors grid and matrices parameters */ 00319 if (mypnum == 0) { 00320 int u, d; 00321 getparam(fp, 00322 &m, &n, 00323 &ma.m, &ma.n, &ma.sprow, &ma.spcol, 00324 &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol, 00325 &mb.m, &mb.n, &mb.sprow, &mb.spcol, 00326 &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol, 00327 &u, &d, 00328 NULL); 00329 uplo = u ? "UPPER" : "LOWER"; 00330 diag = d ? "UNIT" : "NONUNIT"; 00331 printf("\t\t************* TEST # %d **********\n", 00332 nbremax - nbre); 00333 printf(" %3d %3d %3d %3d %3d %3d %3d %3d \ 00334 %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d", 00335 m, n, 00336 ma.m, ma.n, ma.sprow, ma.spcol, 00337 ia, ja, p0, q0, ma.nbrow, ma.nbcol, 00338 mb.m, mb.n, mb.sprow, mb.spcol, 00339 ib, jb, p1, q1, mb.nbrow, mb.nbcol); 00340 printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low", 00341 toupper(*diag) == 'U' ? "unit" : "nonunit"); 00342 printf("\n"); 00343 if (p0 * q0 > nprocs || p1 * q1 > nprocs) { 00344 fprintf(stderr, "not enough nodes:%d processors required\n", 00345 max(p0 * q0, p1 * q1)); 00346 exit(1); 00347 } 00348 parameters[0] = p0; 00349 parameters[1] = q0; 00350 parameters[2] = ma.nbrow; 00351 parameters[3] = ma.nbcol; 00352 parameters[4] = p1; 00353 parameters[5] = q1; 00354 parameters[6] = mb.nbrow; 00355 parameters[7] = mb.nbcol; 00356 parameters[8] = ma.m; 00357 parameters[9] = ma.n; 00358 parameters[10] = ma.sprow; 00359 parameters[11] = ma.spcol; 00360 parameters[12] = mb.sprow; 00361 parameters[13] = mb.spcol; 00362 parameters[14] = ia; 00363 parameters[15] = ja; 00364 parameters[16] = ib; 00365 parameters[17] = jb; 00366 parameters[18] = m; 00367 parameters[19] = n; 00368 parameters[20] = mb.m; 00369 parameters[21] = mb.n; 00370 parameters[22] = *uplo == 'U'; 00371 parameters[23] = *diag == 'U'; 00372 Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1); 00373 } else { 00374 Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0); 00375 p0 = parameters[0]; 00376 q0 = parameters[1]; 00377 ma.nbrow = parameters[2]; 00378 ma.nbcol = parameters[3]; 00379 p1 = parameters[4]; 00380 q1 = parameters[5]; 00381 mb.nbrow = parameters[6]; 00382 mb.nbcol = parameters[7]; 00383 ma.m = parameters[8]; 00384 ma.n = parameters[9]; 00385 ma.sprow = parameters[10]; 00386 ma.spcol = parameters[11]; 00387 mb.sprow = parameters[12]; 00388 mb.spcol = parameters[13]; 00389 ia = parameters[14]; 00390 ja = parameters[15]; 00391 ib = parameters[16]; 00392 jb = parameters[17]; 00393 m = parameters[18]; 00394 n = parameters[19]; 00395 mb.m = parameters[20]; 00396 mb.n = parameters[21]; 00397 ma.desctype = BLOCK_CYCLIC_2D; 00398 mb.desctype = BLOCK_CYCLIC_2D; 00399 uplo = parameters[22] ? "UPPER" : "LOWER"; 00400 diag = parameters[23] ? "UNIT" : "NONUNIT"; 00401 }; 00402 Cblacs_get(0, 0, &context0); 00403 Cblacs_gridinit(&context0, "R", p0, q0); 00404 Cblacs_get(0, 0, &context1); 00405 Cblacs_gridinit(&context1, "R", p1, q1); 00406 Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0); 00407 if (myprow0 >= p0 || mypcol0 >= q0) 00408 myprow0 = mypcol0 = -1; 00409 Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1); 00410 if (myprow1 >= p1 || mypcol1 >= q1) 00411 myprow1 = mypcol1 = -1; 00412 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1)); 00413 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1)); 00414 ma.ctxt = context0; 00415 mb.ctxt = context1; 00416 /* From here, we are not assuming that only the processors working in the 00417 * redistribution are calling xxMR2D, but the ones not concerned will do 00418 * nothing. */ 00419 /* We compute the exact size of the local memory block for the memory 00420 * allocations */ 00421 if (myprow0 >= 0 && mypcol0 >= 0) { 00422 blocksize0 = memoryblocksize(&ma); 00423 ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m); 00424 setmemory(&ptrmyblock, blocksize0); 00425 initblock(ptrmyblock, 1, blocksize0); 00426 setmemory(&ptrmyblockcopy, blocksize0); 00427 memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock, 00428 blocksize0 * sizeof(dcomplex)); 00429 setmemory(&ptrmyblockvide, blocksize0); 00430 for (i = 0; i < blocksize0; i++) 00431 ptrmyblockvide[i].r = -1; 00432 }; /* if (mypnum < p0 * q0) */ 00433 if (myprow1 >= 0 && mypcol1 >= 0) { 00434 setmemory(&ptrsavemyblock, memoryblocksize(&mb)); 00435 mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m); 00436 }; /* if (mypnum < p1 * q1) */ 00437 /* Redistribute the matrix from grid 0 to grid 1 (memory location 00438 * ptrmyblock to ptrsavemyblock) */ 00439 Cpztrmr2d(uplo, diag, m, n, 00440 ptrmyblock, ia, ja, &ma, 00441 ptrsavemyblock, ib, jb, &mb, gcontext); 00442 /* Perform the inverse redistribution of the matrix from grid 1 to grid 0 00443 * (memory location ptrsavemyblock to ptrmyblockvide) */ 00444 Cpztrmr2d(uplo, diag, m, n, 00445 ptrsavemyblock, ib, jb, &mb, 00446 ptrmyblockvide, ia, ja, &ma, gcontext); 00447 /* Check the differences */ 00448 nberrors = 0; 00449 if (myprow0 >= 0 && mypcol0 >= 0) { 00450 /* only for the processors that do have data at the begining */ 00451 for (i = 0; i < blocksize0; i++) { 00452 int li, lj, gi, gj; 00453 int in; 00454 in = 1; 00455 li = i % ma.lda; 00456 lj = i / ma.lda; 00457 gi = (li / ma.nbrow) * p0 * ma.nbrow + 00458 SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow; 00459 gj = (lj / ma.nbcol) * q0 * ma.nbcol + 00460 SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol; 00461 assert(gi < ma.m && gj < ma.n); 00462 gi -= (ia - 1); 00463 gj -= (ja - 1); 00464 if (gi < 0 || gj < 0 || gi >= m || gj >= n) 00465 in = 0; 00466 else if (toupper(*uplo) == 'U') 00467 in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U')); 00468 else 00469 in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U')); 00470 if (!in) { 00471 ptrmyblockcopy[i].r = -1; 00472 } 00473 if (ptrmyblockvide[i].r != ptrmyblockcopy[i].r) { 00474 nberrors++; 00475 printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n" 00476 ,mypnum, i, 00477 ptrmyblockvide[i].r, ptrmyblockcopy[i].r); 00478 }; 00479 }; 00480 if (nberrors > 0) { 00481 printf("Processor %d, has tested %d COMPLEX*16 elements,\ 00482 Number of redistribution errors = %d \n", 00483 mypnum, blocksize0, nberrors); 00484 } 00485 } 00486 /* Look at the errors on all the processors at this point. */ 00487 Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0); 00488 if (mypnum == 0) 00489 if (nberrors) 00490 printf(" => Total number of redistribution errors = %d \n", 00491 nberrors); 00492 else 00493 printf("TEST PASSED OK\n"); 00494 /* release memory for the next iteration */ 00495 if (myprow0 >= 0 && mypcol0 >= 0) { 00496 freememory((char *) ptrmyblock); 00497 freememory((char *) ptrmyblockvide); 00498 freememory((char *) ptrmyblockcopy); 00499 }; /* if (mypnum < p0 * q0) */ 00500 /* release memory for the next iteration */ 00501 if (myprow1 >= 0 && mypcol1 >= 0) { 00502 freememory((char *) ptrsavemyblock); 00503 }; 00504 if (myprow0 >= 0) 00505 Cblacs_gridexit(context0); 00506 if (myprow1 >= 0) 00507 Cblacs_gridexit(context1); 00508 }; /* while nbre != 0 */ 00509 if (mypnum == 0) { 00510 fclose(fp); 00511 }; 00512 Cblacs_exit(0); 00513 return 0; 00514 }/* main */