ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
pstrmr2.c
Go to the documentation of this file.
1 #include "redist.h"
2 /* \$Id: pstrmr2.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp \$
3  *
4  * some functions used by the pstrmr2d routine see file pstrmr.c for more
5  * documentation.
6  *
7  * Created March 1993 by B. Tourancheau (See sccs for modifications). */
8 #define static2 static
10 #define fortran_mr2d pstrmr2do_
11 #define fortran_mr2dnew pstrmr2d_
12 #elif defined(UpCase)
13 #define fortran_mr2dnew PSTRMR2D
14 #define fortran_mr2d PSTRMR2DO
15 #define scopy_ SCOPY
16 #define slacpy_ SLACPY
17 #else
18 #define fortran_mr2d pstrmr2do
19 #define fortran_mr2dnew pstrmr2d
20 #define scopy_ scopy
21 #define slacpy_ slacpy
22 #endif
23 #define Clacpy Cstrlacpy
24 void Clacpy();
25 typedef struct {
26  int desctype;
27  int ctxt;
28  int m;
29  int n;
30  int nbrow;
31  int nbcol;
32  int sprow;
33  int spcol;
34  int lda;
35 } MDESC;
36 #define BLOCK_CYCLIC_2D 1
37 typedef struct {
38  int gstart;
39  int len;
40 } IDESC;
41 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
42 #define max(A,B) ((A)>(B)?(A):(B))
43 #define min(A,B) ((A)>(B)?(B):(A))
44 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
45 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
46 #ifdef MALLOCDEBUG
47 #define malloc mymalloc
48 #define free myfree
49 #define realloc myrealloc
50 #endif
51 /* Cblacs */
52 extern void Cblacs_pcoord();
53 extern int Cblacs_pnum();
54 extern void Csetpvmtids();
55 extern void Cblacs_get();
56 extern void Cblacs_pinfo();
57 extern void Cblacs_gridinfo();
58 extern void Cblacs_gridinit();
59 extern void Cblacs_exit();
60 extern void Cblacs_gridexit();
61 extern void Cblacs_setup();
62 extern void Cigebs2d();
63 extern void Cigebr2d();
64 extern void Cigesd2d();
65 extern void Cigerv2d();
66 extern void Cigsum2d();
67 extern void Cigamn2d();
68 extern void Cigamx2d();
69 extern void Csgesd2d();
70 extern void Csgerv2d();
71 /* lapack */
72 void slacpy_();
73 /* aux fonctions */
74 extern int localindice();
75 extern void *mr2d_malloc();
76 extern int ppcm();
77 extern int localsize();
78 extern int memoryblocksize();
79 extern int changeorigin();
80 extern void paramcheck();
81 /* tools and others function */
82 #define scanD0 strscanD0
83 #define dispmat strdispmat
84 #define setmemory strsetmemory
85 #define freememory strfreememory
86 #define scan_intervals strscan_intervals
87 extern void scanD0();
88 extern void dispmat();
89 extern void setmemory();
90 extern void freememory();
91 extern int scan_intervals();
92 extern void Cpstrmr2do();
93 extern void Cpstrmr2d();
94 /* some defines for Cpstrmr2do */
95 #define SENDBUFF 0
96 #define RECVBUFF 1
97 #define SIZEBUFF 2
98 #if 0
99 #define DEBUG
100 #endif
101 #ifndef DEBUG
102 #define NDEBUG
103 #endif
104 #include <stdio.h>
105 #include <stdlib.h>
106 #include <string.h>
107 #include <assert.h>
108 #include <ctype.h>
109 /* Created March 1993 by B. Tourancheau (See sccs for modifications). */
110 /************************************************************************/
111 /* Set the memory space with the malloc function */
112 void
115  int blocksize;
116 {
117  assert(blocksize >= 0);
118  if (blocksize == 0) {
120  return;
121  }
122  *adpointer = (float *) mr2d_malloc(
123  blocksize * sizeof(float));
124 }
125 /******************************************************************/
126 /* Free the memory space after the malloc */
127 void
128 freememory(ptrtobefreed)
129  float *ptrtobefreed;
130 {
131  if (ptrtobefreed == NULL)
132  return;
133  free((char *) ptrtobefreed);
134 }
135 /* extern functions for intersect() extern scopy_(); */
136 /**************************************************************/
137 /* return the number of elements int the column after i and the distance of
138  * the first one from i, i,j can be negative out of borns, the number of
139  * elements returned can be negative (means 0) */
140 static2 int
141 insidemat(uplo, diag, i, j, m, n, offset)
142  int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */
143  char *uplo, *diag;
144  int *offset;
145 {
146  /* tests outside mxn */
147  assert(j >= 0 && j < n);
148  assert(i >= 0);
149  if (toupper(*uplo) == 'U') {
150  int nbline; /* number of lines in the j_th column */
151  int virtualnbline; /* number of line if we were not limited by m */
152  *offset = 0;
153  virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N');
154  nbline = min(virtualnbline, m);
155  return nbline - i;
156  } else {
157  int firstline; /* first line in the j_th column */
158  int diagcol; /* column where the diag begin */
159  int virtualline; /* virtual first line if the matrix was extended with
160  * negative indices */
161  int off;
162  diagcol = max(n - m, 0);;
163  virtualline = j - diagcol + (toupper(*diag) == 'U');
164  firstline = max(0, virtualline);
165  off = max(firstline - i, 0);
166  *offset = off;
167  i += off;
168  return m - i;
169  }
170 }/* insidemat() */
171 /********************************************************************/
172 /* Execute an action on the local memories when an intersection occurs (the
173  * action can be the filling of the memory buffer, the count of the memory
174  * buffer size or the setting of the memory with the element received) */
175 static2 void
176 intersect(uplo, diag,
177  j, start, end,
178  action,
179  ptrsizebuff, pptrbuff, ptrblock,
180  m, n,
181  ma, ia, ja, templateheight0, templatewidth0,
182  mb, ib, jb, templateheight1, templatewidth1)
183  int action, *ptrsizebuff;
184  int j, start, end;
185  float **pptrbuff, *ptrblock;
186  int templateheight0, templatewidth0;
187  int templateheight1, templatewidth1;
188  MDESC *ma, *mb;
189  int ia, ja, ib, jb, m, n;
190  char *uplo, *diag;
191 /* Execute the action on the local memory for the current interval and
192  * increment pptrbuff and ptrsizebuff of the intervalsize */
193 /* Notice that if the interval is contigous in the virtual matrice, it is
194  * also contigous in the real one ! */
195 {
196  /* int un = 1; only when we use dcopy instead of memcpy */
197  float *ptrstart;
198  int offset, nbline;
199  int intervalsize;
200  assert(start < end);
201  assert(j >= 0 && j < n);
202  nbline =
203  insidemat(uplo, diag, start, j, m, n, &offset);
204  if (nbline <= 0)
205  return;
206  start += offset;
207  if (start >= end)
208  return;
209  intervalsize = min(end - start, nbline);
210  (*ptrsizebuff) += intervalsize;
211  switch (action) {
212  case SENDBUFF: /* fill buff with local elements to be sent */
213  ptrstart = ptrblock + localindice(start + ia, j + ja,
214  templateheight0, templatewidth0, ma);
215  memcpy((char *) (*pptrbuff), (char *) ptrstart,
216  intervalsize * sizeof(float));
217  /* scopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff),
218  * &un); */
219  (*pptrbuff) += intervalsize;
220  break;
221  case RECVBUFF: /* fill local memory with the values received */
222  ptrstart = ptrblock + localindice(start + ib, j + jb,
223  templateheight1, templatewidth1, mb);
224  memcpy((char *) ptrstart, (char *) (*pptrbuff),
225  intervalsize * sizeof(float));
226  /* scopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart),
227  * &un); */
228  (*pptrbuff) += intervalsize;
229  break;
230  case SIZEBUFF: /* computation of sizebuff */
231  break;
232  default:
233  printf("action is %d outside the scope of the case [0..2] !! \n ", action);
234  exit(0);
235  break;
236  }; /* switch (action) */
237 }/* intersect() */
238 /* scan_intervals: scans two distributions in one dimension, and compute the
239  * intersections on the local processor. result must be long enough to
240  * contains the result that are stocked in IDESC structure, the function
241  * returns the number of intersections found */
242 int
243 scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1,
244  result)
245  char type;
246  int ja, jb, n, q0, q1, col0, col1;
247  MDESC *ma, *mb;
248  IDESC *result;
249 {
250  int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1;
251  int l; /* local indice on the beginning of the interval */
252  assert(type == 'c' || type == 'r');
253  nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow);
254  nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow);
255  templatewidth0 = q0 * nbcol0;
256  templatewidth1 = q1 * nbcol1;
257  {
258  int sp0 = (type == 'c' ? ma->spcol : ma->sprow);
259  int sp1 = (type == 'c' ? mb->spcol : mb->sprow);
260  j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja;
261  j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb;
262  }
263  offset = 0;
264  l = 0;
265  /* a small check to verify that the submatrix begin inside the first block
266  * of the original matrix, this done by a sort of coordinate change at the
267  * beginning of the Cpstrmr2d */
268  assert(j0 + nbcol0 > 0);
269  assert(j1 + nbcol1 > 0);
270  while ((j0 < n) && (j1 < n)) {
271  int end0, end1;
272  int start, end;
273  end0 = j0 + nbcol0;
274  end1 = j1 + nbcol1;
275  if (end0 <= j1) {
276  j0 += templatewidth0;
277  l += nbcol0;
278  continue;
279  }
280  if (end1 <= j0) {
281  j1 += templatewidth1;
282  continue;
283  }
284  /* compute the raw intersection */
285  start = max(j0, j1);
286  start = max(start, 0);
287  /* the start is correct now, update the corresponding fields */
288  result[offset].gstart = start;
289  end = min(end0, end1);
290  if (end0 == end) {
291  j0 += templatewidth0;
292  l += nbcol0;
293  }
294  if (end1 == end)
295  j1 += templatewidth1;
296  /* throw the limit if they go out of the matrix */
297  end = min(end, n);
298  assert(end > start);
299  /* it is a bit tricky to see why the length is always positive after all
300  * this min and max, first we have the property that every interval
301  * considered is at least partly into the submatrix, second we arrive
302  * here only if the raw intersection is non-void, if we remove a limit
303  * that means the corresponding frontier is in both intervals which
304  * proove the final interval is non-void, clear ?? */
305  result[offset].len = end - start;
306  offset += 1;
307  } /* while */
308  return offset;
309 }
310 /*********************************************************************/
311 /* Do the scanning of intervals and the requested action */
312 void
313 scanD0(uplo, diag, action, ptrbuff, ptrsizebuff,
314  m, n,
315  ma, ia, ja, p0, q0,
316  mb, ib, jb, p1, q1,
317  v_inter, vinter_nb,
318  h_inter, hinter_nb,
319  ptrblock)
320  int action, /* # of the action done on the intersected intervals */
321  *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be
322  * an output parameter in every cases) */
323  float *ptrbuff /* address of the communication ptrbuffer (a
324  * suffisant memory space is supposed to be allocated
325  before the call) */ , *ptrblock;
326  int p0, q0, p1, q1;
327  IDESC *v_inter, *h_inter;
328  int vinter_nb, hinter_nb;
329  int m, n;
330  int ia, ja, ib, jb;
331  MDESC *ma, *mb;
332  char *uplo, *diag;
333 {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to
334  * the [a..a+au-1] interval of length au */
335  int templateheight1, templatewidth1;
336  int templateheight0, templatewidth0;
337  int h, v; /* for scanning the intervals */
338  /* initializations */
339  templateheight1 = p1 * mb->nbrow;
340  templateheight0 = p0 * ma->nbrow;
341  templatewidth1 = q1 * mb->nbcol;
342  templatewidth0 = q0 * ma->nbcol;
343  /* we now will deal will logical grids, that's to say we change our
344  * numbering of processors so that (0,0) begin on logical processor (0,0) */
345  /* in case we will not enter the while loop */
346  (*ptrsizebuff) = 0;
347  for (h = 0; h < hinter_nb; h++)
348  for (v = 0; v < vinter_nb; v++) {
349  int j;
350  for (j = 0; j < h_inter[h].len; j++)
351  intersect(uplo, diag, j + h_inter[h].gstart,
352  v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len,
353  action, ptrsizebuff, &ptrbuff, ptrblock, m, n,
354  ma, ia, ja, templateheight0, templatewidth0,
355  mb, ib, jb, templateheight1, templatewidth1);
356  }
357 }/* scanD0() */
Csgerv2d
void Csgerv2d()
scanD0
#define scanD0
Definition: pstrmr2.c:82
Cblacs_gridinfo
void Cblacs_gridinfo()
Cigamn2d
void Cigamn2d()
min
#define min(A, B)
Definition: pstrmr2.c:43
Cigebs2d
void Cigebs2d()
Clacpy
#define Clacpy
Definition: pstrmr2.c:23
mr2d_malloc
void * mr2d_malloc()
changeorigin
int changeorigin()
Csgesd2d
void Csgesd2d()
memoryblocksize
int memoryblocksize()
MDESC::sprow
int sprow
Definition: pcgemr.c:170
scan_intervals
#define scan_intervals
Definition: pstrmr2.c:86
Cblacs_setup
void Cblacs_setup()
MDESC::nbcol
int nbcol
Definition: pcgemr.c:169
MDESC
Definition: pcgemr.c:163
setmemory
#define setmemory
Definition: pstrmr2.c:84
Csetpvmtids
void Csetpvmtids()
paramcheck
void paramcheck()
localindice
int localindice()
Cigesd2d
void Cigesd2d()
static2
#define static2
Definition: pstrmr2.c:8
dispmat
#define dispmat
Definition: pstrmr2.c:83
Cigerv2d
void Cigerv2d()
SIZEBUFF
#define SIZEBUFF
Definition: pstrmr2.c:97
IDESC
Definition: pcgemr.c:175
freememory
#define freememory
Definition: pstrmr2.c:85
IDESC::gstart
int gstart
Definition: pctrmr.c:191
Cigamx2d
void Cigamx2d()
Cigebr2d
void Cigebr2d()
Cblacs_pcoord
void Cblacs_pcoord()
insidemat
static2 int insidemat(char *uplo, char *diag, int i, int j, int m, int n, int *offset)
Definition: pstrmr2.c:141
Cblacs_pinfo
void Cblacs_pinfo()
ppcm
int ppcm()
intersect
static2 void intersect(char *uplo, char *diag, int j, int start, int end, int action, int *ptrsizebuff, float **pptrbuff, float *ptrblock, int m, int n, MDESC *ma, int ia, int ja, int templateheight0, int templatewidth0, MDESC *mb, int ib, int jb, int templateheight1, int templatewidth1)
Definition: pstrmr2.c:176
max
#define max(A, B)
Definition: pstrmr2.c:42
slacpy_
#define slacpy_
Definition: pstrmr2.c:21
MDESC::spcol
int spcol
Definition: pcgemr.c:171
Cblacs_get
void Cblacs_get()
RECVBUFF
#define RECVBUFF
Definition: pstrmr2.c:96
Cblacs_gridinit
void Cblacs_gridinit()
redist.h
localsize
int localsize()
Cpstrmr2d
void Cpstrmr2d()
SHIFT
#define SHIFT(row, sprow, nbrow)
Definition: pstrmr2.c:41
IDESC::len
int len
Definition: pcgemr.c:177
Cblacs_pnum
int Cblacs_pnum()
MDESC::nbrow
int nbrow
Definition: pcgemr.c:168
Cblacs_gridexit
void Cblacs_gridexit()
Cigsum2d
void Cigsum2d()
SENDBUFF
#define SENDBUFF
Definition: pstrmr2.c:95
Cpstrmr2do
void Cpstrmr2do()
Cblacs_exit
void Cblacs_exit()