ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pztrmr2.c
Go to the documentation of this file.
1 #include "redist.h"
2 /* $Id: pztrmr2.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $
3  *
4  * some functions used by the pztrmr2d routine see file pztrmr.c for more
5  * documentation.
6  *
7  * Created March 1993 by B. Tourancheau (See sccs for modifications). */
8 #define static2 static
9 #if defined(Add_) || defined(f77IsF2C)
10 #define fortran_mr2d pztrmr2do_
11 #define fortran_mr2dnew pztrmr2d_
12 #elif defined(UpCase)
13 #define fortran_mr2dnew PZTRMR2D
14 #define fortran_mr2d PZTRMR2DO
15 #define zcopy_ ZCOPY
16 #define zlacpy_ ZLACPY
17 #else
18 #define fortran_mr2d pztrmr2do
19 #define fortran_mr2dnew pztrmr2d
20 #define zcopy_ zcopy
21 #define zlacpy_ zlacpy
22 #endif
23 #define Clacpy Cztrlacpy
24 void Clacpy();
25 typedef struct {
26  double r, i;
27 } dcomplex;
28 typedef struct {
29  int desctype;
30  int ctxt;
31  int m;
32  int n;
33  int nbrow;
34  int nbcol;
35  int sprow;
36  int spcol;
37  int lda;
38 } MDESC;
39 #define BLOCK_CYCLIC_2D 1
40 typedef struct {
41  int gstart;
42  int len;
43 } IDESC;
44 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
45 #define max(A,B) ((A)>(B)?(A):(B))
46 #define min(A,B) ((A)>(B)?(B):(A))
47 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
48 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
49 #ifdef MALLOCDEBUG
50 #define malloc mymalloc
51 #define free myfree
52 #define realloc myrealloc
53 #endif
54 /* Cblacs */
55 extern void Cblacs_pcoord();
56 extern int Cblacs_pnum();
57 extern void Csetpvmtids();
58 extern void Cblacs_get();
59 extern void Cblacs_pinfo();
60 extern void Cblacs_gridinfo();
61 extern void Cblacs_gridinit();
62 extern void Cblacs_exit();
63 extern void Cblacs_gridexit();
64 extern void Cblacs_setup();
65 extern void Cigebs2d();
66 extern void Cigebr2d();
67 extern void Cigesd2d();
68 extern void Cigerv2d();
69 extern void Cigsum2d();
70 extern void Cigamn2d();
71 extern void Cigamx2d();
72 extern void Czgesd2d();
73 extern void Czgerv2d();
74 /* lapack */
75 void zlacpy_();
76 /* aux fonctions */
77 extern int localindice();
78 extern void *mr2d_malloc();
79 extern int ppcm();
80 extern int localsize();
81 extern int memoryblocksize();
82 extern int changeorigin();
83 extern void paramcheck();
84 /* tools and others function */
85 #define scanD0 ztrscanD0
86 #define dispmat ztrdispmat
87 #define setmemory ztrsetmemory
88 #define freememory ztrfreememory
89 #define scan_intervals ztrscan_intervals
90 extern void scanD0();
91 extern void dispmat();
92 extern void setmemory();
93 extern void freememory();
94 extern int scan_intervals();
95 extern void Cpztrmr2do();
96 extern void Cpztrmr2d();
97 /* some defines for Cpztrmr2do */
98 #define SENDBUFF 0
99 #define RECVBUFF 1
100 #define SIZEBUFF 2
101 #if 0
102 #define DEBUG
103 #endif
104 #ifndef DEBUG
105 #define NDEBUG
106 #endif
107 #include <stdio.h>
108 #include <stdlib.h>
109 #include <string.h>
110 #include <assert.h>
111 #include <ctype.h>
112 /* Created March 1993 by B. Tourancheau (See sccs for modifications). */
113 /************************************************************************/
114 /* Set the memory space with the malloc function */
115 void
116 setmemory(adpointer, blocksize)
117  dcomplex **adpointer;
118  int blocksize;
119 {
120  assert(blocksize >= 0);
121  if (blocksize == 0) {
122  *adpointer = NULL;
123  return;
124  }
125  *adpointer = (dcomplex *) mr2d_malloc(
126  blocksize * sizeof(dcomplex));
127 }
128 /******************************************************************/
129 /* Free the memory space after the malloc */
130 void
131 freememory(ptrtobefreed)
132  dcomplex *ptrtobefreed;
133 {
134  if (ptrtobefreed == NULL)
135  return;
136  free((char *) ptrtobefreed);
137 }
138 /* extern functions for intersect() extern zcopy_(); */
139 /**************************************************************/
140 /* return the number of elements int the column after i and the distance of
141  * the first one from i, i,j can be negative out of borns, the number of
142  * elements returned can be negative (means 0) */
143 static2 int
144 insidemat(uplo, diag, i, j, m, n, offset)
145  int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */
146  char *uplo, *diag;
147  int *offset;
148 {
149  /* tests outside mxn */
150  assert(j >= 0 && j < n);
151  assert(i >= 0);
152  if (toupper(*uplo) == 'U') {
153  int nbline; /* number of lines in the j_th column */
154  int virtualnbline; /* number of line if we were not limited by m */
155  *offset = 0;
156  virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N');
157  nbline = min(virtualnbline, m);
158  return nbline - i;
159  } else {
160  int firstline; /* first line in the j_th column */
161  int diagcol; /* column where the diag begin */
162  int virtualline; /* virtual first line if the matrix was extended with
163  * negative indices */
164  int off;
165  diagcol = max(n - m, 0);;
166  virtualline = j - diagcol + (toupper(*diag) == 'U');
167  firstline = max(0, virtualline);
168  off = max(firstline - i, 0);
169  *offset = off;
170  i += off;
171  return m - i;
172  }
173 }/* insidemat() */
174 /********************************************************************/
175 /* Execute an action on the local memories when an intersection occurs (the
176  * action can be the filling of the memory buffer, the count of the memory
177  * buffer size or the setting of the memory with the element received) */
178 static2 void
179 intersect(uplo, diag,
180  j, start, end,
181  action,
182  ptrsizebuff, pptrbuff, ptrblock,
183  m, n,
184  ma, ia, ja, templateheight0, templatewidth0,
185  mb, ib, jb, templateheight1, templatewidth1)
186  int action, *ptrsizebuff;
187  int j, start, end;
188  dcomplex **pptrbuff, *ptrblock;
189  int templateheight0, templatewidth0;
190  int templateheight1, templatewidth1;
191  MDESC *ma, *mb;
192  int ia, ja, ib, jb, m, n;
193  char *uplo, *diag;
194 /* Execute the action on the local memory for the current interval and
195  * increment pptrbuff and ptrsizebuff of the intervalsize */
196 /* Notice that if the interval is contigous in the virtual matrice, it is
197  * also contigous in the real one ! */
198 {
199  /* int un = 1; only when we use dcopy instead of memcpy */
200  dcomplex *ptrstart;
201  int offset, nbline;
202  int intervalsize;
203  assert(start < end);
204  assert(j >= 0 && j < n);
205  nbline =
206  insidemat(uplo, diag, start, j, m, n, &offset);
207  if (nbline <= 0)
208  return;
209  start += offset;
210  if (start >= end)
211  return;
212  intervalsize = min(end - start, nbline);
213  (*ptrsizebuff) += intervalsize;
214  switch (action) {
215  case SENDBUFF: /* fill buff with local elements to be sent */
216  ptrstart = ptrblock + localindice(start + ia, j + ja,
217  templateheight0, templatewidth0, ma);
218  memcpy((char *) (*pptrbuff), (char *) ptrstart,
219  intervalsize * sizeof(dcomplex));
220  /* zcopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff),
221  * &un); */
222  (*pptrbuff) += intervalsize;
223  break;
224  case RECVBUFF: /* fill local memory with the values received */
225  ptrstart = ptrblock + localindice(start + ib, j + jb,
226  templateheight1, templatewidth1, mb);
227  memcpy((char *) ptrstart, (char *) (*pptrbuff),
228  intervalsize * sizeof(dcomplex));
229  /* zcopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart),
230  * &un); */
231  (*pptrbuff) += intervalsize;
232  break;
233  case SIZEBUFF: /* computation of sizebuff */
234  break;
235  default:
236  printf("action is %d outside the scope of the case [0..2] !! \n ", action);
237  exit(0);
238  break;
239  }; /* switch (action) */
240 }/* intersect() */
241 /* scan_intervals: scans two distributions in one dimension, and compute the
242  * intersections on the local processor. result must be long enough to
243  * contains the result that are stocked in IDESC structure, the function
244  * returns the number of intersections found */
245 int
246 scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1,
247  result)
248  char type;
249  int ja, jb, n, q0, q1, col0, col1;
250  MDESC *ma, *mb;
251  IDESC *result;
252 {
253  int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1;
254  int l; /* local indice on the beginning of the interval */
255  assert(type == 'c' || type == 'r');
256  nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow);
257  nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow);
258  templatewidth0 = q0 * nbcol0;
259  templatewidth1 = q1 * nbcol1;
260  {
261  int sp0 = (type == 'c' ? ma->spcol : ma->sprow);
262  int sp1 = (type == 'c' ? mb->spcol : mb->sprow);
263  j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja;
264  j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb;
265  }
266  offset = 0;
267  l = 0;
268  /* a small check to verify that the submatrix begin inside the first block
269  * of the original matrix, this done by a sort of coordinate change at the
270  * beginning of the Cpztrmr2d */
271  assert(j0 + nbcol0 > 0);
272  assert(j1 + nbcol1 > 0);
273  while ((j0 < n) && (j1 < n)) {
274  int end0, end1;
275  int start, end;
276  end0 = j0 + nbcol0;
277  end1 = j1 + nbcol1;
278  if (end0 <= j1) {
279  j0 += templatewidth0;
280  l += nbcol0;
281  continue;
282  }
283  if (end1 <= j0) {
284  j1 += templatewidth1;
285  continue;
286  }
287  /* compute the raw intersection */
288  start = max(j0, j1);
289  start = max(start, 0);
290  /* the start is correct now, update the corresponding fields */
291  result[offset].gstart = start;
292  end = min(end0, end1);
293  if (end0 == end) {
294  j0 += templatewidth0;
295  l += nbcol0;
296  }
297  if (end1 == end)
298  j1 += templatewidth1;
299  /* throw the limit if they go out of the matrix */
300  end = min(end, n);
301  assert(end > start);
302  /* it is a bit tricky to see why the length is always positive after all
303  * this min and max, first we have the property that every interval
304  * considered is at least partly into the submatrix, second we arrive
305  * here only if the raw intersection is non-void, if we remove a limit
306  * that means the corresponding frontier is in both intervals which
307  * proove the final interval is non-void, clear ?? */
308  result[offset].len = end - start;
309  offset += 1;
310  } /* while */
311  return offset;
312 }
313 /*********************************************************************/
314 /* Do the scanning of intervals and the requested action */
315 void
316 scanD0(uplo, diag, action, ptrbuff, ptrsizebuff,
317  m, n,
318  ma, ia, ja, p0, q0,
319  mb, ib, jb, p1, q1,
320  v_inter, vinter_nb,
321  h_inter, hinter_nb,
322  ptrblock)
323  int action, /* # of the action done on the intersected intervals */
324  *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be
325  * an output parameter in every cases) */
326  dcomplex *ptrbuff /* address of the communication ptrbuffer (a
327  * suffisant memory space is supposed to be allocated
328  before the call) */ , *ptrblock;
329  int p0, q0, p1, q1;
330  IDESC *v_inter, *h_inter;
331  int vinter_nb, hinter_nb;
332  int m, n;
333  int ia, ja, ib, jb;
334  MDESC *ma, *mb;
335  char *uplo, *diag;
336 {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to
337  * the [a..a+au-1] interval of length au */
338  int templateheight1, templatewidth1;
339  int templateheight0, templatewidth0;
340  int h, v; /* for scanning the intervals */
341  /* initializations */
342  templateheight1 = p1 * mb->nbrow;
343  templateheight0 = p0 * ma->nbrow;
344  templatewidth1 = q1 * mb->nbcol;
345  templatewidth0 = q0 * ma->nbcol;
346  /* we now will deal will logical grids, that's to say we change our
347  * numbering of processors so that (0,0) begin on logical processor (0,0) */
348  /* in case we will not enter the while loop */
349  (*ptrsizebuff) = 0;
350  for (h = 0; h < hinter_nb; h++)
351  for (v = 0; v < vinter_nb; v++) {
352  int j;
353  for (j = 0; j < h_inter[h].len; j++)
354  intersect(uplo, diag, j + h_inter[h].gstart,
355  v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len,
356  action, ptrsizebuff, &ptrbuff, ptrblock, m, n,
357  ma, ia, ja, templateheight0, templatewidth0,
358  mb, ib, jb, templateheight1, templatewidth1);
359  }
360 }/* scanD0() */
Cblacs_pinfo
void Cblacs_pinfo()
Cblacs_pnum
int Cblacs_pnum()
SHIFT
#define SHIFT(row, sprow, nbrow)
Definition: pztrmr2.c:44
zlacpy_
#define zlacpy_
Definition: pztrmr2.c:21
max
#define max(A, B)
Definition: pztrmr2.c:45
Cigsum2d
void Cigsum2d()
memoryblocksize
int memoryblocksize()
MDESC::sprow
int sprow
Definition: pcgemr.c:170
min
#define min(A, B)
Definition: pztrmr2.c:46
MDESC::nbcol
int nbcol
Definition: pcgemr.c:169
ppcm
int ppcm()
MDESC
Definition: pcgemr.c:163
Cblacs_gridinfo
void Cblacs_gridinfo()
scan_intervals
#define scan_intervals
Definition: pztrmr2.c:89
Cblacs_get
void Cblacs_get()
dispmat
#define dispmat
Definition: pztrmr2.c:86
SIZEBUFF
#define SIZEBUFF
Definition: pztrmr2.c:100
Czgesd2d
void Czgesd2d()
Cigamx2d
void Cigamx2d()
Cblacs_exit
void Cblacs_exit()
insidemat
static2 int insidemat(char *uplo, char *diag, int i, int j, int m, int n, int *offset)
Definition: pztrmr2.c:144
scanD0
#define scanD0
Definition: pztrmr2.c:85
paramcheck
void paramcheck()
RECVBUFF
#define RECVBUFF
Definition: pztrmr2.c:99
IDESC
Definition: pcgemr.c:175
Cblacs_gridinit
void Cblacs_gridinit()
IDESC::gstart
int gstart
Definition: pctrmr.c:191
Cigerv2d
void Cigerv2d()
setmemory
#define setmemory
Definition: pztrmr2.c:87
Clacpy
#define Clacpy
Definition: pztrmr2.c:23
Cblacs_pcoord
void Cblacs_pcoord()
Cigesd2d
void Cigesd2d()
Czgerv2d
void Czgerv2d()
intersect
static2 void intersect(char *uplo, char *diag, int j, int start, int end, int action, int *ptrsizebuff, dcomplex **pptrbuff, dcomplex *ptrblock, int m, int n, MDESC *ma, int ia, int ja, int templateheight0, int templatewidth0, MDESC *mb, int ib, int jb, int templateheight1, int templatewidth1)
Definition: pztrmr2.c:179
Cblacs_setup
void Cblacs_setup()
mr2d_malloc
void * mr2d_malloc()
static2
#define static2
Definition: pztrmr2.c:8
MDESC::spcol
int spcol
Definition: pcgemr.c:171
Cigebs2d
void Cigebs2d()
Cigamn2d
void Cigamn2d()
redist.h
localindice
int localindice()
Cblacs_gridexit
void Cblacs_gridexit()
Cpztrmr2d
void Cpztrmr2d()
freememory
#define freememory
Definition: pztrmr2.c:88
dcomplex
Definition: pzgemr.c:160
IDESC::len
int len
Definition: pcgemr.c:177
Cpztrmr2do
void Cpztrmr2do()
Csetpvmtids
void Csetpvmtids()
changeorigin
int changeorigin()
Cigebr2d
void Cigebr2d()
SENDBUFF
#define SENDBUFF
Definition: pztrmr2.c:98
MDESC::nbrow
int nbrow
Definition: pcgemr.c:168
localsize
int localsize()