ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
pgemraux.c
Go to the documentation of this file.
1 #include "redist.h"
2 /* \$Id: pgemraux.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp \$
3  *
4  * some functions used by the pigemr2d routine see file pigemr.c for more
5  * documentation.
6  *
7  * Created March 1993 by B. Tourancheau (See sccs for modifications). */
8 #define static2 static
9 #if defined(Add_) || defined(f77IsF2C)
10 #define fortran_mr2d pigemr2do_
11 #define fortran_mr2dnew pigemr2d_
12 #elif defined(UpCase)
13 #define fortran_mr2dnew PIGEMR2D
14 #define fortran_mr2d PIGEMR2DO
15 #define icopy_ ICOPY
16 #define ilacpy_ ILACPY
17 #else
18 #define fortran_mr2d pigemr2do
19 #define fortran_mr2dnew pigemr2d
20 #define icopy_ icopy
21 #define ilacpy_ ilacpy
22 #endif
23 #define Clacpy Cigelacpy
24 void Clacpy();
25 typedef struct {
26  int desctype;
27  int ctxt;
28  int m;
29  int n;
30  int nbrow;
31  int nbcol;
32  int sprow;
33  int spcol;
34  int lda;
35 } MDESC;
36 #define BLOCK_CYCLIC_2D 1
37 typedef struct {
38  int lstart;
39  int len;
40 } IDESC;
41 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
42 #define max(A,B) ((A)>(B)?(A):(B))
43 #define min(A,B) ((A)>(B)?(B):(A))
44 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
45 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
46 #ifdef MALLOCDEBUG
47 #define malloc mymalloc
48 #define free myfree
49 #define realloc myrealloc
50 #endif
51 /* Cblacs */
52 extern void Cblacs_pcoord();
53 extern int Cblacs_pnum();
54 extern void Csetpvmtids();
55 extern void Cblacs_get();
56 extern void Cblacs_pinfo();
57 extern void Cblacs_gridinfo();
58 extern void Cblacs_gridinit();
59 extern void Cblacs_exit();
60 extern void Cblacs_gridexit();
61 extern void Cblacs_setup();
62 extern void Cigebs2d();
63 extern void Cigebr2d();
64 extern void Cigesd2d();
65 extern void Cigerv2d();
66 extern void Cigsum2d();
67 extern void Cigamn2d();
68 extern void Cigamx2d();
69 extern void Cigesd2d();
70 extern void Cigerv2d();
71 /* lapack */
72 void ilacpy_();
73 /* aux fonctions */
74 extern int localindice();
75 extern void *mr2d_malloc();
76 extern int ppcm();
77 extern int localsize();
78 extern int memoryblocksize();
79 extern int changeorigin();
80 extern void paramcheck();
81 /* tools and others function */
82 #define scanD0 igescanD0
83 #define dispmat igedispmat
84 #define setmemory igesetmemory
85 #define freememory igefreememory
86 #define scan_intervals igescan_intervals
87 extern void scanD0();
88 extern void dispmat();
89 extern void setmemory();
90 extern void freememory();
91 extern int scan_intervals();
92 extern void Cpigemr2do();
93 extern void Cpigemr2d();
94 /* some defines for Cpigemr2do */
95 #define SENDBUFF 0
96 #define RECVBUFF 1
97 #define SIZEBUFF 2
98 #if 0
99 #define DEBUG
100 #endif
101 #ifndef DEBUG
102 #define NDEBUG
103 #endif
104 #include <stdio.h>
105 #include <stdlib.h>
106 #include <assert.h>
107 void *
109  int n;
110 {
111  void *ptr;
112  assert(n > 0);
113  ptr = (void *) malloc(n);
114  if (ptr == NULL) {
115  fprintf(stderr, "xxmr2d:out of memory\n");
116  exit(2);
117  }
118  return ptr;
119 }
120 int
121 pgcd(a, b)
122  int a, b;
123 {
124  int aux;
125  if (a < b)
126  return pgcd(b, a);
127  else {
128  aux = a % b;
129  if (aux == 0)
130  return b;
131  else
132  return pgcd(b, aux);
133  }
134 }
135 int
136 ppcm(a, b)
137  int a, b;
138 {
139  int pg;
140  pg = pgcd(a, b);
141  return a * (b / pg);
142 }
143 /* localsize:return the number of rows on the local processor given by its
144  * row number myprow, of a distributed matrix with m rows distributed of on a
145  * grid of processors with p rows with blocksize nbrow : this procedure can
146  * also be used to compute the number of cols by replacing rows by cols */
147 int
148 localsize(myprow, p, nbrow, m)
149  int myprow, p, nbrow, m;
150 {
151  int templateheight, blockheight;
152  templateheight = p * nbrow;
153  if (m % templateheight != 0) { /* not an exact boundary */
154  if ((m % templateheight) > (nbrow * myprow)) { /* processor
155  * (myprow,mypcol) has
156  * some elements in that
157  * incomplete template */
158  if ((m % templateheight) >= (nbrow * (myprow + 1))) { /* processor
159  * (myprow,mypcol)'s
160  * part is complete */
161  blockheight = (m / templateheight) * nbrow + nbrow;
162  } else { /* processor (myprow,mypcol)'s part is not complete */
163  blockheight = (m / templateheight) * nbrow + (m % nbrow);
164  }; /* if ((m%templateheight) > (nbrow*(myprow+1))) */
165  } else { /* processor (myprow,mypcol) has no element in that
166  * incomplete template */
167  blockheight = (m / templateheight) * nbrow;
168  }; /* if ((m%templateheight) > (nbrow*myprow)) */
169  } else { /* exact boundary */
170  blockheight = m / p; /* (m/templateheight) * nbrow */
171  }; /* if (m%templateheight !=0) */
172  return blockheight;
173 }
174 /****************************************************************/
175 /* Returns the exact memory block size corresponding to the parameters */
176 int
178  MDESC *a;
179 {
180  int myprow, mypcol, p, q;
181  /* Compute the (myprow,mypcol) indices of processor mypnum in P0xQ0 We
182  * assume the row-major ordering of the BLACS */
183  Cblacs_gridinfo(a->ctxt, &p, &q, &myprow, &mypcol);
184  myprow = SHIFT(myprow, a->sprow, p);
185  mypcol = SHIFT(mypcol, a->spcol, q);
186  assert(myprow >= 0 && mypcol >= 0);
187  return localsize(myprow, p, a->nbrow, a->m) *
188  localsize(mypcol, q, a->nbcol, a->n);
189 }
190 void
191 checkequal(ctxt, a)
192  int a, ctxt;
193 {
194  int np, dummy, nbrow, myp, b;
195  Cblacs_gridinfo(ctxt, &nbrow, &np, &dummy, &myp);
196  assert(nbrow == 1);
197  if (np == 1)
198  return;
199  if (myp == 0) {
200  Cigesd2d(ctxt, 1, 1, &a, 1, 0, 1);
201  Cigerv2d(ctxt, 1, 1, &b, 1, 0, np - 1);
202  assert(a == b);
203  } else {
204  Cigerv2d(ctxt, 1, 1, &b, 1, 0, myp - 1);
205  assert(a == b);
206  Cigesd2d(ctxt, 1, 1, &a, 1, 0, (myp + 1) % np);
207  }
208 }
209 void
210 paramcheck(a, i, j, m, n, p, q, gcontext)
211  MDESC *a;
212  int i, j, m, n, p, q;
213 {
214  int p2, q2, myprow, mypcol;
215 #ifndef NDEBUG
216  checkequal(gcontext, p);
217  checkequal(gcontext, q);
218  checkequal(gcontext, a->sprow);
219  checkequal(gcontext, a->spcol);
220  checkequal(gcontext, a->m);
221  checkequal(gcontext, a->n);
222  checkequal(gcontext, i);
223  checkequal(gcontext, j);
224  checkequal(gcontext, a->nbrow);
225  checkequal(gcontext, a->nbcol);
226 #endif
227  Cblacs_gridinfo(a->ctxt, &p2, &q2, &myprow, &mypcol);
228  /* compatibility T3D, must check myprow and mypcol are within bounds */
229  if (myprow >= p2 || mypcol >= q2)
230  myprow = mypcol = -1;
231  if ((myprow >= 0 || mypcol >= 0) && (p2 != p && q2 != q)) {
232  fprintf(stderr, "??MR2D:incoherent p,q parameters\n");
233  exit(1);
234  }
235  assert(myprow < p && mypcol < q);
236  if (a->sprow < 0 || a->sprow >= p || a->spcol < 0 || a->spcol >= q) {
237  fprintf(stderr, "??MR2D:Bad first processor coordinates\n");
238  exit(1);
239  }
240  if (i < 0 || j < 0 || i + m > a->m || j + n > a->n) {
241  fprintf(stderr, "??MR2D:Bad submatrix:i=%d,j=%d,\
242 m=%d,n=%d,M=%d,N=%d\n",
243  i, j, m, n, a->m, a->n);
244  exit(1);
245  }
246  if ((myprow >= 0 || mypcol >= 0) &&
247  localsize(SHIFT(myprow, a->sprow, p), p, a->nbrow, a->m) > a->lda) {
248  fprintf(stderr, "??MR2D:bad lda arg:row=%d,m=%d,p=%d,\
249 nbrow=%d,lda=%d,sprow=%d\n",
250  myprow, a->m, p, a->nbrow, a->lda, a->sprow);
251  exit(1);
252  }
253 }
254 /* to change from the submatrix beginning at line i to one beginning at line
255  * i' with i'< blocksize return the line number on the local process where
256  * the new matrix begin, the new process number, and i' */
257 int
258 changeorigin(myp, sp, p, bs, i, decal, newsp)
259  int myp, sp, p, bs, i;
260  int *decal, *newsp;
261 {
262  int tempheight, firstblock, firsttemp;
263  /* we begin by changing the parameters so that ia < templatewidth,... */
264  tempheight = bs * p;
265  firsttemp = i / tempheight;
266  firstblock = (i / bs) % p;
267  *newsp = (sp + firstblock) % p;
268  if (myp >= 0)
269  *decal = firsttemp * bs + (SHIFT(myp, sp, p) < firstblock ? bs : 0);
270  else
271  *decal = 0;
272  return i % bs;
273 }
274 /******************************************************************/
275 /* Return the indice in local memory of element of indice a in the matrix */
276 int
277 localindice(ig, jg, templateheight, templatewidth, a)
278  int templateheight, templatewidth, ig, jg;
279  MDESC *a;
280 /* Return the indice in local memory (scattered distribution) of the element
281  * of indice a in global matrix */
282 {
283  int vtemp, htemp, vsubtemp, hsubtemp, il, jl;
284  assert(ig >= 0 && ig < a->m && jg >= 0 && jg < a->n);
285  /* coordinates in global matrix with the tests in intersect, ig MUST BE in
286  * [0..m] and jg in [0..n] */
287  /* coordinates of the template that "owns" the element */
288  vtemp = ig / templateheight;
289  htemp = jg / templatewidth;
290  /* coordinates of the element in the subblock of the (vtemp, htemp)
291  * template */
292  vsubtemp = ig % a->nbrow;
293  hsubtemp = jg % a->nbcol;
294  /* coordinates of the element in the local block of the processor */
295  il = a->nbrow * vtemp + vsubtemp;
296  jl = a->nbcol * htemp + hsubtemp;
297  assert(il < a->lda);
298 #ifndef NDEBUG
299  {
300  int pr, pc, p, q, lp, lq;
301  Cblacs_gridinfo(a->ctxt, &p, &q, &pr, &pc);
302  p = templateheight / a->nbrow;
303  q = templatewidth / a->nbcol;
304  lp = ig % templateheight / a->nbrow;
305  lq = jg % templatewidth / a->nbcol;
306  assert(lp == SHIFT(pr, a->sprow, p));
307  assert(lq == SHIFT(pc, a->spcol, q));
308  }
309 #endif
310  return (jl * a->lda + il);
311 }
Cblacs_get
void Cblacs_get()
localindice
int localindice()
Csetpvmtids
void Csetpvmtids()
checkequal
void checkequal(int ctxt, int a)
Definition: pgemraux.c:191
Cblacs_pnum
int Cblacs_pnum()
Cblacs_gridinit
void Cblacs_gridinit()
freememory
#define freememory
Definition: pgemraux.c:85
Cigerv2d
void Cigerv2d()
MDESC::ctxt
int ctxt
Definition: pcgemr.c:165
scanD0
#define scanD0
Definition: pgemraux.c:82
ppcm
int ppcm()
Cigebr2d
void Cigebr2d()
Cigamx2d
void Cigamx2d()
MDESC::sprow
int sprow
Definition: pcgemr.c:170
MDESC::n
int n
Definition: pcgemr.c:167
MDESC::nbcol
int nbcol
Definition: pcgemr.c:169
MDESC
Definition: pcgemr.c:163
Cblacs_pcoord
void Cblacs_pcoord()
Cblacs_gridinfo
void Cblacs_gridinfo()
Cigsum2d
void Cigsum2d()
localsize
int localsize()
IDESC
Definition: pcgemr.c:175
Cpigemr2do
void Cpigemr2do()
Cblacs_setup
void Cblacs_setup()
scan_intervals
#define scan_intervals
Definition: pgemraux.c:86
MDESC::spcol
int spcol
Definition: pcgemr.c:171
Cigebs2d
void Cigebs2d()
redist.h
Cigesd2d
void Cigesd2d()
pgcd
int pgcd(int a, int b)
Definition: pgemraux.c:121
setmemory
#define setmemory
Definition: pgemraux.c:84
Cigamn2d
void Cigamn2d()
Clacpy
#define Clacpy
Definition: pgemraux.c:23
paramcheck
void paramcheck()
mr2d_malloc
void * mr2d_malloc()
memoryblocksize
int memoryblocksize()
Cblacs_gridexit
void Cblacs_gridexit()
dispmat
#define dispmat
Definition: pgemraux.c:83
MDESC::lda
int lda
Definition: pcgemr.c:172
Cblacs_exit
void Cblacs_exit()
MDESC::nbrow
int nbrow
Definition: pcgemr.c:168
Cpigemr2d
void Cpigemr2d()
MDESC::m
int m
Definition: pcgemr.c:166
changeorigin
int changeorigin()
Cblacs_pinfo
void Cblacs_pinfo()
SHIFT
#define SHIFT(row, sprow, nbrow)
Definition: pgemraux.c:41
ilacpy_
#define ilacpy_
Definition: pgemraux.c:21