ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pstrmrdrv.c
Go to the documentation of this file.
1 #include "redist.h"
2 /* $Id: pstrmrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $
3  *
4  * pstrmrdrv.c :
5  *
6  *
7  * PURPOSE:
8  *
9  * this driver is testing the PSTRMR2D routine. It calls it to obtain a new
10  * scattered block data decomposition of a distributed REAL (block scattered)
11  * matrix. Then it calls PSTRMR2D for the inverse redistribution and checks
12  * the results with the initial data.
13  *
14  * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the
15  * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the
16  * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition
17  * on the processor grid p0 x q0.
18  *
19  * See pstrmr.c file for detailed info on the PSTRMR2D function.
20  *
21  *
22  * The testing parameters are read from the file TRMR2D.dat, see the file in the
23  * distribution to have an example.
24  *
25  * created by Bernard Tourancheau in April 1994.
26  *
27  * modifications : see sccs history
28  *
29  * ===================================
30  *
31  *
32  * NOTE :
33  *
34  * - the matrix elements are REAL
35  *
36  * - memory requirements : this procedure requires approximately 3 times the
37  * memory space of the initial data block in grid 0 (initial block, copy for
38  * test and second redistribution result) and 1 time the memory space of the
39  * result data block in grid 1. with the element size = sizeof(float) bytes,
40  *
41  *
42  * - use the procedures of the files:
43  *
44  * pstrmr.o pstrmr2.o pstrmraux.o
45  *
46  *
47  * ======================================
48  *
49  * WARNING ASSUMPTIONS :
50  *
51  *
52  * ========================================
53  *
54  *
55  * Planned changes:
56  *
57  *
58  *
59  * ========================================= */
60 #define static2 static
61 #if defined(Add_) || defined(f77IsF2C)
62 #define fortran_mr2d pstrmr2do_
63 #define fortran_mr2dnew pstrmr2d_
64 #elif defined(UpCase)
65 #define fortran_mr2dnew PSTRMR2D
66 #define fortran_mr2d PSTRMR2DO
67 #define scopy_ SCOPY
68 #define slacpy_ SLACPY
69 #else
70 #define fortran_mr2d pstrmr2do
71 #define fortran_mr2dnew pstrmr2d
72 #define scopy_ scopy
73 #define slacpy_ slacpy
74 #endif
75 #define Clacpy Cstrlacpy
76 void Clacpy();
77 typedef struct {
78  int desctype;
79  int ctxt;
80  int m;
81  int n;
82  int nbrow;
83  int nbcol;
84  int sprow;
85  int spcol;
86  int lda;
87 } MDESC;
88 #define BLOCK_CYCLIC_2D 1
89 typedef struct {
90  int gstart;
91  int len;
92 } IDESC;
93 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
94 #define max(A,B) ((A)>(B)?(A):(B))
95 #define min(A,B) ((A)>(B)?(B):(A))
96 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
97 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
98 #ifdef MALLOCDEBUG
99 #define malloc mymalloc
100 #define free myfree
101 #define realloc myrealloc
102 #endif
103 /* Cblacs */
104 extern void Cblacs_pcoord();
105 extern int Cblacs_pnum();
106 extern void Csetpvmtids();
107 extern void Cblacs_get();
108 extern void Cblacs_pinfo();
109 extern void Cblacs_gridinfo();
110 extern void Cblacs_gridinit();
111 extern void Cblacs_exit();
112 extern void Cblacs_gridexit();
113 extern void Cblacs_setup();
114 extern void Cigebs2d();
115 extern void Cigebr2d();
116 extern void Cigesd2d();
117 extern void Cigerv2d();
118 extern void Cigsum2d();
119 extern void Cigamn2d();
120 extern void Cigamx2d();
121 extern void Csgesd2d();
122 extern void Csgerv2d();
123 /* lapack */
124 void slacpy_();
125 /* aux fonctions */
126 extern int localindice();
127 extern void *mr2d_malloc();
128 extern int ppcm();
129 extern int localsize();
130 extern int memoryblocksize();
131 extern int changeorigin();
132 extern void paramcheck();
133 /* tools and others function */
134 #define scanD0 strscanD0
135 #define dispmat strdispmat
136 #define setmemory strsetmemory
137 #define freememory strfreememory
138 #define scan_intervals strscan_intervals
139 extern void scanD0();
140 extern void dispmat();
141 extern void setmemory();
142 extern void freememory();
143 extern int scan_intervals();
144 extern void Cpstrmr2do();
145 extern void Cpstrmr2d();
146 /* some defines for Cpstrmr2do */
147 #define SENDBUFF 0
148 #define RECVBUFF 1
149 #define SIZEBUFF 2
150 #if 0
151 #define DEBUG
152 #endif
153 #ifndef DEBUG
154 #define NDEBUG
155 #endif
156 #include <stdio.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <ctype.h>
160 #include <assert.h>
161 /* initblock: intialize the local part of a matrix with random data (well,
162  * not very random) */
163 static2 void
164 initblock(block, m, n)
165  float *block;
166  int m, n;
167 {
168  float *pdata;
169  int i;
170  pdata = block;
171  for (i = 0; i < m * n; i++, pdata++) {
172  (*pdata) = i;
173  };
174 }
175 /* getparam:read from a file a list of integer parameters, the end of the
176  * parameters to read is given by a NULL at the end of the args list */
177 #ifdef __STDC__
178 #include <stdarg.h>
179 static void
180 getparam(FILE * f,...)
181 {
182 #else
183 #include <varargs.h>
184 static void
185 getparam(va_alist)
186 va_dcl
187 {
188  FILE *f;
189 #endif
190  va_list ap;
191  int i;
192  static int nbline;
193  char *ptr, *next;
194  int *var;
195  static char buffer[200];
196 #ifdef __STDC__
197  va_start(ap, f);
198 #else
199  va_start(ap);
200  f = va_arg(ap, FILE *);
201 #endif
202  do {
203  next = fgets(buffer, 200, f);
204  if (next == NULL) {
205  fprintf(stderr, "bad configuration driver file:after line %d\n", nbline);
206  exit(1);
207  }
208  nbline += 1;
209  } while (buffer[0] == '#');
210  ptr = buffer;
211  var = va_arg(ap, int *);
212  while (var != NULL) {
213  *var = strtol(ptr, &next, 10);
214  if (ptr == next) {
215  fprintf(stderr, "bad configuration driver file:error line %d\n", nbline);
216  exit(1);
217  }
218  ptr = next;
219  var = va_arg(ap, int *);
220  }
221  va_end(ap);
222 }
223 void
224 initforpvm(argc, argv)
225  int argc;
226  char *argv[];
227 {
228  int pnum, nproc;
229  Cblacs_pinfo(&pnum, &nproc);
230  if (nproc < 1) { /* we are with PVM */
231  if (pnum == 0) {
232  if (argc < 2) {
233  fprintf(stderr, "usage with PVM:xstrmr nbproc\n\
234 \t where nbproc is the number of nodes to initialize\n");
235  exit(1);
236  }
237  nproc = atoi(argv[1]);
238  }
239  Cblacs_setup(&pnum, &nproc);
240  }
241 }
242 int
243 main(argc, argv)
244  int argc;
245  char *argv[];
246 {
247  /* We initialize the data-block on the current processor, then redistribute
248  * it, and perform the inverse redistribution to compare the local memory
249  * with the initial one. */
250  /* Data file */
251  FILE *fp;
252  int nbre, nbremax;
253  /* Data distribution 0 parameters */
254  int p0, /* # of rows in the processor grid */
255  q0; /* # of columns in the processor grid */
256  /* Data distribution 1 parameters */
257  int p1, q1;
258  /* # of parameter to be read on the keyboard */
259 #define nbparameter 24
260  /* General variables */
261  int blocksize0;
262  int mypnum, nprocs;
263  int parameters[nbparameter], nberrors;
264  int i;
265  int ia, ja, ib, jb, m, n;
266  int gcontext, context0, context1;
267  int myprow1, myprow0, mypcol0, mypcol1;
268  int dummy;
269  MDESC ma, mb;
270  char *uplo, *diag;
271  float *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide;
272 #ifdef UsingMpiBlacs
273  MPI_Init(&argc, &argv);
274 #endif
275  setvbuf(stdout, NULL, _IOLBF, 0);
276  setvbuf(stderr, NULL, _IOLBF, 0);
277 #ifdef T3D
278  free(malloc(14000000));
279 #endif
280  initforpvm(argc, argv);
281  /* Read physical parameters */
282  Cblacs_pinfo(&mypnum, &nprocs);
283  /* initialize BLACS for the parameter communication */
284  Cblacs_get(0, 0, &gcontext);
285  Cblacs_gridinit(&gcontext, "R", nprocs, 1);
286  Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy);
287  if (mypnum == 0) {
288  if ((fp = fopen("TRMR2D.dat", "r")) == NULL) {
289  fprintf(stderr, "Can't open TRMR2D.dat\n");
290  exit(1);
291  };
292  printf("\n// STRMR2D TESTER for REAL //\n");
293  getparam(fp, &nbre, NULL);
294  printf("////////// %d tests \n\n", nbre);
295  parameters[0] = nbre;
296  Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1);
297  } else {
298  Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0);
299  nbre = parameters[0];
300  };
301  if (mypnum == 0) {
302  printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \
303 m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n");
304  };
305  /****** TEST LOOP *****/
306  /* Here we are in grip 1xnprocs */
307  nbremax = nbre;
308 #ifdef DEBUG
309  fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum);
310 #endif
311  while (nbre-- != 0) { /* Loop on the serie of tests */
312  /* All the processors read the parameters so we have to be in a 1xnprocs
313  * grid at each iteration */
314  /* Read processors grid and matrices parameters */
315  if (mypnum == 0) {
316  int u, d;
317  getparam(fp,
318  &m, &n,
319  &ma.m, &ma.n, &ma.sprow, &ma.spcol,
320  &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol,
321  &mb.m, &mb.n, &mb.sprow, &mb.spcol,
322  &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol,
323  &u, &d,
324  NULL);
325  uplo = u ? "UPPER" : "LOWER";
326  diag = d ? "UNIT" : "NONUNIT";
327  printf("\t\t************* TEST # %d **********\n",
328  nbremax - nbre);
329  printf(" %3d %3d %3d %3d %3d %3d %3d %3d \
330 %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d",
331  m, n,
332  ma.m, ma.n, ma.sprow, ma.spcol,
333  ia, ja, p0, q0, ma.nbrow, ma.nbcol,
334  mb.m, mb.n, mb.sprow, mb.spcol,
335  ib, jb, p1, q1, mb.nbrow, mb.nbcol);
336  printf(" %s %s", toupper(*uplo) == 'U' ? "up" : "low",
337  toupper(*diag) == 'U' ? "unit" : "nonunit");
338  printf("\n");
339  if (p0 * q0 > nprocs || p1 * q1 > nprocs) {
340  fprintf(stderr, "not enough nodes:%d processors required\n",
341  max(p0 * q0, p1 * q1));
342  exit(1);
343  }
344  parameters[0] = p0;
345  parameters[1] = q0;
346  parameters[2] = ma.nbrow;
347  parameters[3] = ma.nbcol;
348  parameters[4] = p1;
349  parameters[5] = q1;
350  parameters[6] = mb.nbrow;
351  parameters[7] = mb.nbcol;
352  parameters[8] = ma.m;
353  parameters[9] = ma.n;
354  parameters[10] = ma.sprow;
355  parameters[11] = ma.spcol;
356  parameters[12] = mb.sprow;
357  parameters[13] = mb.spcol;
358  parameters[14] = ia;
359  parameters[15] = ja;
360  parameters[16] = ib;
361  parameters[17] = jb;
362  parameters[18] = m;
363  parameters[19] = n;
364  parameters[20] = mb.m;
365  parameters[21] = mb.n;
366  parameters[22] = *uplo == 'U';
367  parameters[23] = *diag == 'U';
368  Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1);
369  } else {
370  Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0);
371  p0 = parameters[0];
372  q0 = parameters[1];
373  ma.nbrow = parameters[2];
374  ma.nbcol = parameters[3];
375  p1 = parameters[4];
376  q1 = parameters[5];
377  mb.nbrow = parameters[6];
378  mb.nbcol = parameters[7];
379  ma.m = parameters[8];
380  ma.n = parameters[9];
381  ma.sprow = parameters[10];
382  ma.spcol = parameters[11];
383  mb.sprow = parameters[12];
384  mb.spcol = parameters[13];
385  ia = parameters[14];
386  ja = parameters[15];
387  ib = parameters[16];
388  jb = parameters[17];
389  m = parameters[18];
390  n = parameters[19];
391  mb.m = parameters[20];
392  mb.n = parameters[21];
395  uplo = parameters[22] ? "UPPER" : "LOWER";
396  diag = parameters[23] ? "UNIT" : "NONUNIT";
397  };
398  Cblacs_get(0, 0, &context0);
399  Cblacs_gridinit(&context0, "R", p0, q0);
400  Cblacs_get(0, 0, &context1);
401  Cblacs_gridinit(&context1, "R", p1, q1);
402  Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0);
403  if (myprow0 >= p0 || mypcol0 >= q0)
404  myprow0 = mypcol0 = -1;
405  Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1);
406  if (myprow1 >= p1 || mypcol1 >= q1)
407  myprow1 = mypcol1 = -1;
408  assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
409  assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
410  ma.ctxt = context0;
411  mb.ctxt = context1;
412  /* From here, we are not assuming that only the processors working in the
413  * redistribution are calling xxMR2D, but the ones not concerned will do
414  * nothing. */
415  /* We compute the exact size of the local memory block for the memory
416  * allocations */
417  if (myprow0 >= 0 && mypcol0 >= 0) {
418  blocksize0 = memoryblocksize(&ma);
419  ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m);
420  setmemory(&ptrmyblock, blocksize0);
421  initblock(ptrmyblock, 1, blocksize0);
422  setmemory(&ptrmyblockcopy, blocksize0);
423  memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock,
424  blocksize0 * sizeof(float));
425  setmemory(&ptrmyblockvide, blocksize0);
426  for (i = 0; i < blocksize0; i++)
427  ptrmyblockvide[i] = -1;
428  }; /* if (mypnum < p0 * q0) */
429  if (myprow1 >= 0 && mypcol1 >= 0) {
430  setmemory(&ptrsavemyblock, memoryblocksize(&mb));
431  mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m);
432  }; /* if (mypnum < p1 * q1) */
433  /* Redistribute the matrix from grid 0 to grid 1 (memory location
434  * ptrmyblock to ptrsavemyblock) */
435  Cpstrmr2d(uplo, diag, m, n,
436  ptrmyblock, ia, ja, &ma,
437  ptrsavemyblock, ib, jb, &mb, gcontext);
438  /* Perform the inverse redistribution of the matrix from grid 1 to grid 0
439  * (memory location ptrsavemyblock to ptrmyblockvide) */
440  Cpstrmr2d(uplo, diag, m, n,
441  ptrsavemyblock, ib, jb, &mb,
442  ptrmyblockvide, ia, ja, &ma, gcontext);
443  /* Check the differences */
444  nberrors = 0;
445  if (myprow0 >= 0 && mypcol0 >= 0) {
446  /* only for the processors that do have data at the begining */
447  for (i = 0; i < blocksize0; i++) {
448  int li, lj, gi, gj;
449  int in;
450  in = 1;
451  li = i % ma.lda;
452  lj = i / ma.lda;
453  gi = (li / ma.nbrow) * p0 * ma.nbrow +
454  SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow;
455  gj = (lj / ma.nbcol) * q0 * ma.nbcol +
456  SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol;
457  assert(gi < ma.m && gj < ma.n);
458  gi -= (ia - 1);
459  gj -= (ja - 1);
460  if (gi < 0 || gj < 0 || gi >= m || gj >= n)
461  in = 0;
462  else if (toupper(*uplo) == 'U')
463  in = (gi <= gj + max(0, m - n) - (toupper(*diag) == 'U'));
464  else
465  in = (gi >= gj - max(0, n - m) + (toupper(*diag) == 'U'));
466  if (!in) {
467  ptrmyblockcopy[i] = -1;
468  }
469  if (ptrmyblockvide[i] != ptrmyblockcopy[i]) {
470  nberrors++;
471  };
472  };
473  if (nberrors > 0) {
474  printf("Processor %d, has tested %d REAL elements,\
475 Number of redistribution errors = %d \n",
476  mypnum, blocksize0, nberrors);
477  }
478  }
479  /* Look at the errors on all the processors at this point. */
480  Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0);
481  if (mypnum == 0)
482  if (nberrors)
483  printf(" => Total number of redistribution errors = %d \n",
484  nberrors);
485  else
486  printf("TEST PASSED OK\n");
487  /* release memory for the next iteration */
488  if (myprow0 >= 0 && mypcol0 >= 0) {
489  freememory((char *) ptrmyblock);
490  freememory((char *) ptrmyblockvide);
491  freememory((char *) ptrmyblockcopy);
492  }; /* if (mypnum < p0 * q0) */
493  /* release memory for the next iteration */
494  if (myprow1 >= 0 && mypcol1 >= 0) {
495  freememory((char *) ptrsavemyblock);
496  };
497  if (myprow0 >= 0)
498  Cblacs_gridexit(context0);
499  if (myprow1 >= 0)
500  Cblacs_gridexit(context1);
501  }; /* while nbre != 0 */
502  if (mypnum == 0) {
503  fclose(fp);
504  };
505  Cblacs_exit(0);
506  return 0;
507 }/* main */
Cigesd2d
void Cigesd2d()
Cblacs_pnum
int Cblacs_pnum()
setmemory
#define setmemory
Definition: pstrmrdrv.c:136
Cblacs_setup
void Cblacs_setup()
max
#define max(A, B)
Definition: pstrmrdrv.c:94
Cigsum2d
void Cigsum2d()
Cigebs2d
void Cigebs2d()
MDESC::ctxt
int ctxt
Definition: pcgemr.c:165
Clacpy
#define Clacpy
Definition: pstrmrdrv.c:75
mr2d_malloc
void * mr2d_malloc()
Csetpvmtids
void Csetpvmtids()
Cigamx2d
void Cigamx2d()
Cblacs_pcoord
void Cblacs_pcoord()
MDESC::sprow
int sprow
Definition: pcgemr.c:170
nbparameter
#define nbparameter
Cpstrmr2d
void Cpstrmr2d()
Cigamn2d
void Cigamn2d()
MDESC::n
int n
Definition: pcgemr.c:167
MDESC::nbcol
int nbcol
Definition: pcgemr.c:169
static2
#define static2
Definition: pstrmrdrv.c:60
Cblacs_exit
void Cblacs_exit()
memoryblocksize
int memoryblocksize()
MDESC
Definition: pcgemr.c:163
localindice
int localindice()
BLOCK_CYCLIC_2D
#define BLOCK_CYCLIC_2D
Definition: pstrmrdrv.c:88
SHIFT
#define SHIFT(row, sprow, nbrow)
Definition: pstrmrdrv.c:93
Cblacs_gridinit
void Cblacs_gridinit()
scanD0
#define scanD0
Definition: pstrmrdrv.c:134
Csgesd2d
void Csgesd2d()
localsize
int localsize()
redist.h
MDESC::desctype
int desctype
Definition: pcgemr.c:164
IDESC
Definition: pcgemr.c:175
initblock
static2 void initblock(float *block, int m, int n)
Definition: pstrmrdrv.c:164
Cpstrmr2do
void Cpstrmr2do()
paramcheck
void paramcheck()
MDESC::spcol
int spcol
Definition: pcgemr.c:171
main
int main(int argc, argv)
Definition: pstrmrdrv.c:243
changeorigin
int changeorigin()
Cblacs_gridexit
void Cblacs_gridexit()
slacpy_
#define slacpy_
Definition: pstrmrdrv.c:73
Cigerv2d
void Cigerv2d()
ppcm
int ppcm()
Cigebr2d
void Cigebr2d()
Csgerv2d
void Csgerv2d()
Cblacs_pinfo
void Cblacs_pinfo()
MDESC::lda
int lda
Definition: pcgemr.c:172
freememory
#define freememory
Definition: pstrmrdrv.c:137
scan_intervals
#define scan_intervals
Definition: pstrmrdrv.c:138
MDESC::nbrow
int nbrow
Definition: pcgemr.c:168
initforpvm
void initforpvm(int argc, argv)
Definition: pstrmrdrv.c:224
dispmat
#define dispmat
Definition: pstrmrdrv.c:135
MDESC::m
int m
Definition: pcgemr.c:166
Cblacs_get
void Cblacs_get()
Cblacs_gridinfo
void Cblacs_gridinfo()