|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 #include "redist.h" 00002 /* $Id: pctrmr2.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $ 00003 * 00004 * some functions used by the pctrmr2d routine see file pctrmr.c for more 00005 * documentation. 00006 * 00007 * Created March 1993 by B. Tourancheau (See sccs for modifications). */ 00008 #define static2 static 00009 #if defined(Add_) || defined(f77IsF2C) 00010 #define fortran_mr2d pctrmr2do_ 00011 #define fortran_mr2dnew pctrmr2d_ 00012 #elif defined(UpCase) 00013 #define fortran_mr2dnew PCTRMR2D 00014 #define fortran_mr2d PCTRMR2DO 00015 #define ccopy_ CCOPY 00016 #define clacpy_ CLACPY 00017 #else 00018 #define fortran_mr2d pctrmr2do 00019 #define fortran_mr2dnew pctrmr2d 00020 #define ccopy_ ccopy 00021 #define clacpy_ clacpy 00022 #endif 00023 #define Clacpy Cctrlacpy 00024 void Clacpy(); 00025 typedef struct { 00026 float r, i; 00027 } complex; 00028 typedef struct { 00029 int desctype; 00030 int ctxt; 00031 int m; 00032 int n; 00033 int nbrow; 00034 int nbcol; 00035 int sprow; 00036 int spcol; 00037 int lda; 00038 } MDESC; 00039 #define BLOCK_CYCLIC_2D 1 00040 typedef struct { 00041 int gstart; 00042 int len; 00043 } IDESC; 00044 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) 00045 #define max(A,B) ((A)>(B)?(A):(B)) 00046 #define min(A,B) ((A)>(B)?(B):(A)) 00047 #define DIVUP(a,b) ( ((a)-1) /(b)+1) 00048 #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) 00049 #ifdef MALLOCDEBUG 00050 #define malloc mymalloc 00051 #define free myfree 00052 #define realloc myrealloc 00053 #endif 00054 /* Cblacs */ 00055 extern void Cblacs_pcoord(); 00056 extern int Cblacs_pnum(); 00057 extern void Csetpvmtids(); 00058 extern void Cblacs_get(); 00059 extern void Cblacs_pinfo(); 00060 extern void Cblacs_gridinfo(); 00061 extern void Cblacs_gridinit(); 00062 extern void Cblacs_exit(); 00063 extern void Cblacs_gridexit(); 00064 extern void Cblacs_setup(); 00065 extern void Cigebs2d(); 00066 extern void Cigebr2d(); 00067 extern void Cigesd2d(); 00068 extern void Cigerv2d(); 00069 extern void Cigsum2d(); 00070 extern void Cigamn2d(); 00071 extern void Cigamx2d(); 00072 extern void Ccgesd2d(); 00073 extern void Ccgerv2d(); 00074 /* lapack */ 00075 void clacpy_(); 00076 /* aux fonctions */ 00077 extern int localindice(); 00078 extern void *mr2d_malloc(); 00079 extern int ppcm(); 00080 extern int localsize(); 00081 extern int memoryblocksize(); 00082 extern int changeorigin(); 00083 extern void paramcheck(); 00084 /* tools and others function */ 00085 #define scanD0 ctrscanD0 00086 #define dispmat ctrdispmat 00087 #define setmemory ctrsetmemory 00088 #define freememory ctrfreememory 00089 #define scan_intervals ctrscan_intervals 00090 extern void scanD0(); 00091 extern void dispmat(); 00092 extern void setmemory(); 00093 extern void freememory(); 00094 extern int scan_intervals(); 00095 extern void Cpctrmr2do(); 00096 extern void Cpctrmr2d(); 00097 /* some defines for Cpctrmr2do */ 00098 #define SENDBUFF 0 00099 #define RECVBUFF 1 00100 #define SIZEBUFF 2 00101 #if 0 00102 #define DEBUG 00103 #endif 00104 #ifndef DEBUG 00105 #define NDEBUG 00106 #endif 00107 #include <stdio.h> 00108 #include <stdlib.h> 00109 #include <string.h> 00110 #include <assert.h> 00111 #include <ctype.h> 00112 /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ 00113 /************************************************************************/ 00114 /* Set the memory space with the malloc function */ 00115 void 00116 setmemory(adpointer, blocksize) 00117 complex **adpointer; 00118 int blocksize; 00119 { 00120 assert(blocksize >= 0); 00121 if (blocksize == 0) { 00122 *adpointer = NULL; 00123 return; 00124 } 00125 *adpointer = (complex *) mr2d_malloc( 00126 blocksize * sizeof(complex)); 00127 } 00128 /******************************************************************/ 00129 /* Free the memory space after the malloc */ 00130 void 00131 freememory(ptrtobefreed) 00132 complex *ptrtobefreed; 00133 { 00134 if (ptrtobefreed == NULL) 00135 return; 00136 free((char *) ptrtobefreed); 00137 } 00138 /* extern functions for intersect() extern ccopy_(); */ 00139 /**************************************************************/ 00140 /* return the number of elements int the column after i and the distance of 00141 * the first one from i, i,j can be negative out of borns, the number of 00142 * elements returned can be negative (means 0) */ 00143 static2 int 00144 insidemat(uplo, diag, i, j, m, n, offset) 00145 int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */ 00146 char *uplo, *diag; 00147 int *offset; 00148 { 00149 /* tests outside mxn */ 00150 assert(j >= 0 && j < n); 00151 assert(i >= 0); 00152 if (toupper(*uplo) == 'U') { 00153 int nbline; /* number of lines in the j_th column */ 00154 int virtualnbline; /* number of line if we were not limited by m */ 00155 *offset = 0; 00156 virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N'); 00157 nbline = min(virtualnbline, m); 00158 return nbline - i; 00159 } else { 00160 int firstline; /* first line in the j_th column */ 00161 int diagcol; /* column where the diag begin */ 00162 int virtualline; /* virtual first line if the matrix was extended with 00163 * negative indices */ 00164 int off; 00165 diagcol = max(n - m, 0);; 00166 virtualline = j - diagcol + (toupper(*diag) == 'U'); 00167 firstline = max(0, virtualline); 00168 off = max(firstline - i, 0); 00169 *offset = off; 00170 i += off; 00171 return m - i; 00172 } 00173 }/* insidemat() */ 00174 /********************************************************************/ 00175 /* Execute an action on the local memories when an intersection occurs (the 00176 * action can be the filling of the memory buffer, the count of the memory 00177 * buffer size or the setting of the memory with the element received) */ 00178 static2 void 00179 intersect(uplo, diag, 00180 j, start, end, 00181 action, 00182 ptrsizebuff, pptrbuff, ptrblock, 00183 m, n, 00184 ma, ia, ja, templateheight0, templatewidth0, 00185 mb, ib, jb, templateheight1, templatewidth1) 00186 int action, *ptrsizebuff; 00187 int j, start, end; 00188 complex **pptrbuff, *ptrblock; 00189 int templateheight0, templatewidth0; 00190 int templateheight1, templatewidth1; 00191 MDESC *ma, *mb; 00192 int ia, ja, ib, jb, m, n; 00193 char *uplo, *diag; 00194 /* Execute the action on the local memory for the current interval and 00195 * increment pptrbuff and ptrsizebuff of the intervalsize */ 00196 /* Notice that if the interval is contigous in the virtual matrice, it is 00197 * also contigous in the real one ! */ 00198 { 00199 /* int un = 1; only when we use dcopy instead of memcpy */ 00200 complex *ptrstart; 00201 int offset, nbline; 00202 int intervalsize; 00203 assert(start < end); 00204 assert(j >= 0 && j < n); 00205 nbline = 00206 insidemat(uplo, diag, start, j, m, n, &offset); 00207 if (nbline <= 0) 00208 return; 00209 start += offset; 00210 if (start >= end) 00211 return; 00212 intervalsize = min(end - start, nbline); 00213 (*ptrsizebuff) += intervalsize; 00214 switch (action) { 00215 case SENDBUFF: /* fill buff with local elements to be sent */ 00216 ptrstart = ptrblock + localindice(start + ia, j + ja, 00217 templateheight0, templatewidth0, ma); 00218 memcpy((char *) (*pptrbuff), (char *) ptrstart, 00219 intervalsize * sizeof(complex)); 00220 /* ccopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff), 00221 * &un); */ 00222 (*pptrbuff) += intervalsize; 00223 break; 00224 case RECVBUFF: /* fill local memory with the values received */ 00225 ptrstart = ptrblock + localindice(start + ib, j + jb, 00226 templateheight1, templatewidth1, mb); 00227 memcpy((char *) ptrstart, (char *) (*pptrbuff), 00228 intervalsize * sizeof(complex)); 00229 /* ccopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart), 00230 * &un); */ 00231 (*pptrbuff) += intervalsize; 00232 break; 00233 case SIZEBUFF: /* computation of sizebuff */ 00234 break; 00235 default: 00236 printf("action is %d outside the scope of the case [0..2] !! \n ", action); 00237 exit(0); 00238 break; 00239 }; /* switch (action) */ 00240 }/* intersect() */ 00241 /* scan_intervals: scans two distributions in one dimension, and compute the 00242 * intersections on the local processor. result must be long enough to 00243 * contains the result that are stocked in IDESC structure, the function 00244 * returns the number of intersections found */ 00245 int 00246 scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, 00247 result) 00248 char type; 00249 int ja, jb, n, q0, q1, col0, col1; 00250 MDESC *ma, *mb; 00251 IDESC *result; 00252 { 00253 int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; 00254 int l; /* local indice on the beginning of the interval */ 00255 assert(type == 'c' || type == 'r'); 00256 nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); 00257 nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); 00258 templatewidth0 = q0 * nbcol0; 00259 templatewidth1 = q1 * nbcol1; 00260 { 00261 int sp0 = (type == 'c' ? ma->spcol : ma->sprow); 00262 int sp1 = (type == 'c' ? mb->spcol : mb->sprow); 00263 j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; 00264 j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; 00265 } 00266 offset = 0; 00267 l = 0; 00268 /* a small check to verify that the submatrix begin inside the first block 00269 * of the original matrix, this done by a sort of coordinate change at the 00270 * beginning of the Cpctrmr2d */ 00271 assert(j0 + nbcol0 > 0); 00272 assert(j1 + nbcol1 > 0); 00273 while ((j0 < n) && (j1 < n)) { 00274 int end0, end1; 00275 int start, end; 00276 end0 = j0 + nbcol0; 00277 end1 = j1 + nbcol1; 00278 if (end0 <= j1) { 00279 j0 += templatewidth0; 00280 l += nbcol0; 00281 continue; 00282 } 00283 if (end1 <= j0) { 00284 j1 += templatewidth1; 00285 continue; 00286 } 00287 /* compute the raw intersection */ 00288 start = max(j0, j1); 00289 start = max(start, 0); 00290 /* the start is correct now, update the corresponding fields */ 00291 result[offset].gstart = start; 00292 end = min(end0, end1); 00293 if (end0 == end) { 00294 j0 += templatewidth0; 00295 l += nbcol0; 00296 } 00297 if (end1 == end) 00298 j1 += templatewidth1; 00299 /* throw the limit if they go out of the matrix */ 00300 end = min(end, n); 00301 assert(end > start); 00302 /* it is a bit tricky to see why the length is always positive after all 00303 * this min and max, first we have the property that every interval 00304 * considered is at least partly into the submatrix, second we arrive 00305 * here only if the raw intersection is non-void, if we remove a limit 00306 * that means the corresponding frontier is in both intervals which 00307 * proove the final interval is non-void, clear ?? */ 00308 result[offset].len = end - start; 00309 offset += 1; 00310 } /* while */ 00311 return offset; 00312 } 00313 /*********************************************************************/ 00314 /* Do the scanning of intervals and the requested action */ 00315 void 00316 scanD0(uplo, diag, action, ptrbuff, ptrsizebuff, 00317 m, n, 00318 ma, ia, ja, p0, q0, 00319 mb, ib, jb, p1, q1, 00320 v_inter, vinter_nb, 00321 h_inter, hinter_nb, 00322 ptrblock) 00323 int action, /* # of the action done on the intersected intervals */ 00324 *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be 00325 * an output parameter in every cases) */ 00326 complex *ptrbuff /* address of the communication ptrbuffer (a 00327 * suffisant memory space is supposed to be allocated 00328 before the call) */ , *ptrblock; 00329 int p0, q0, p1, q1; 00330 IDESC *v_inter, *h_inter; 00331 int vinter_nb, hinter_nb; 00332 int m, n; 00333 int ia, ja, ib, jb; 00334 MDESC *ma, *mb; 00335 char *uplo, *diag; 00336 {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to 00337 * the [a..a+au-1] interval of length au */ 00338 int templateheight1, templatewidth1; 00339 int templateheight0, templatewidth0; 00340 int h, v; /* for scanning the intervals */ 00341 /* initializations */ 00342 templateheight1 = p1 * mb->nbrow; 00343 templateheight0 = p0 * ma->nbrow; 00344 templatewidth1 = q1 * mb->nbcol; 00345 templatewidth0 = q0 * ma->nbcol; 00346 /* we now will deal will logical grids, that's to say we change our 00347 * numbering of processors so that (0,0) begin on logical processor (0,0) */ 00348 /* in case we will not enter the while loop */ 00349 (*ptrsizebuff) = 0; 00350 for (h = 0; h < hinter_nb; h++) 00351 for (v = 0; v < vinter_nb; v++) { 00352 int j; 00353 for (j = 0; j < h_inter[h].len; j++) 00354 intersect(uplo, diag, j + h_inter[h].gstart, 00355 v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len, 00356 action, ptrsizebuff, &ptrbuff, ptrblock, m, n, 00357 ma, ia, ja, templateheight0, templatewidth0, 00358 mb, ib, jb, templateheight1, templatewidth1); 00359 } 00360 }/* scanD0() */