|
ScaLAPACK
2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
|
00001 #include "redist.h" 00002 /* $Id: pstrmr2.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $ 00003 * 00004 * some functions used by the pstrmr2d routine see file pstrmr.c for more 00005 * documentation. 00006 * 00007 * Created March 1993 by B. Tourancheau (See sccs for modifications). */ 00008 #define static2 static 00009 #if defined(Add_) || defined(f77IsF2C) 00010 #define fortran_mr2d pstrmr2do_ 00011 #define fortran_mr2dnew pstrmr2d_ 00012 #elif defined(UpCase) 00013 #define fortran_mr2dnew PSTRMR2D 00014 #define fortran_mr2d PSTRMR2DO 00015 #define scopy_ SCOPY 00016 #define slacpy_ SLACPY 00017 #else 00018 #define fortran_mr2d pstrmr2do 00019 #define fortran_mr2dnew pstrmr2d 00020 #define scopy_ scopy 00021 #define slacpy_ slacpy 00022 #endif 00023 #define Clacpy Cstrlacpy 00024 void Clacpy(); 00025 typedef struct { 00026 int desctype; 00027 int ctxt; 00028 int m; 00029 int n; 00030 int nbrow; 00031 int nbcol; 00032 int sprow; 00033 int spcol; 00034 int lda; 00035 } MDESC; 00036 #define BLOCK_CYCLIC_2D 1 00037 typedef struct { 00038 int gstart; 00039 int len; 00040 } IDESC; 00041 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow))) 00042 #define max(A,B) ((A)>(B)?(A):(B)) 00043 #define min(A,B) ((A)>(B)?(B):(A)) 00044 #define DIVUP(a,b) ( ((a)-1) /(b)+1) 00045 #define ROUNDUP(a,b) (DIVUP(a,b)*(b)) 00046 #ifdef MALLOCDEBUG 00047 #define malloc mymalloc 00048 #define free myfree 00049 #define realloc myrealloc 00050 #endif 00051 /* Cblacs */ 00052 extern void Cblacs_pcoord(); 00053 extern int Cblacs_pnum(); 00054 extern void Csetpvmtids(); 00055 extern void Cblacs_get(); 00056 extern void Cblacs_pinfo(); 00057 extern void Cblacs_gridinfo(); 00058 extern void Cblacs_gridinit(); 00059 extern void Cblacs_exit(); 00060 extern void Cblacs_gridexit(); 00061 extern void Cblacs_setup(); 00062 extern void Cigebs2d(); 00063 extern void Cigebr2d(); 00064 extern void Cigesd2d(); 00065 extern void Cigerv2d(); 00066 extern void Cigsum2d(); 00067 extern void Cigamn2d(); 00068 extern void Cigamx2d(); 00069 extern void Csgesd2d(); 00070 extern void Csgerv2d(); 00071 /* lapack */ 00072 void slacpy_(); 00073 /* aux fonctions */ 00074 extern int localindice(); 00075 extern void *mr2d_malloc(); 00076 extern int ppcm(); 00077 extern int localsize(); 00078 extern int memoryblocksize(); 00079 extern int changeorigin(); 00080 extern void paramcheck(); 00081 /* tools and others function */ 00082 #define scanD0 strscanD0 00083 #define dispmat strdispmat 00084 #define setmemory strsetmemory 00085 #define freememory strfreememory 00086 #define scan_intervals strscan_intervals 00087 extern void scanD0(); 00088 extern void dispmat(); 00089 extern void setmemory(); 00090 extern void freememory(); 00091 extern int scan_intervals(); 00092 extern void Cpstrmr2do(); 00093 extern void Cpstrmr2d(); 00094 /* some defines for Cpstrmr2do */ 00095 #define SENDBUFF 0 00096 #define RECVBUFF 1 00097 #define SIZEBUFF 2 00098 #if 0 00099 #define DEBUG 00100 #endif 00101 #ifndef DEBUG 00102 #define NDEBUG 00103 #endif 00104 #include <stdio.h> 00105 #include <stdlib.h> 00106 #include <string.h> 00107 #include <assert.h> 00108 #include <ctype.h> 00109 /* Created March 1993 by B. Tourancheau (See sccs for modifications). */ 00110 /************************************************************************/ 00111 /* Set the memory space with the malloc function */ 00112 void 00113 setmemory(adpointer, blocksize) 00114 float **adpointer; 00115 int blocksize; 00116 { 00117 assert(blocksize >= 0); 00118 if (blocksize == 0) { 00119 *adpointer = NULL; 00120 return; 00121 } 00122 *adpointer = (float *) mr2d_malloc( 00123 blocksize * sizeof(float)); 00124 } 00125 /******************************************************************/ 00126 /* Free the memory space after the malloc */ 00127 void 00128 freememory(ptrtobefreed) 00129 float *ptrtobefreed; 00130 { 00131 if (ptrtobefreed == NULL) 00132 return; 00133 free((char *) ptrtobefreed); 00134 } 00135 /* extern functions for intersect() extern scopy_(); */ 00136 /**************************************************************/ 00137 /* return the number of elements int the column after i and the distance of 00138 * the first one from i, i,j can be negative out of borns, the number of 00139 * elements returned can be negative (means 0) */ 00140 static2 int 00141 insidemat(uplo, diag, i, j, m, n, offset) 00142 int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */ 00143 char *uplo, *diag; 00144 int *offset; 00145 { 00146 /* tests outside mxn */ 00147 assert(j >= 0 && j < n); 00148 assert(i >= 0); 00149 if (toupper(*uplo) == 'U') { 00150 int nbline; /* number of lines in the j_th column */ 00151 int virtualnbline; /* number of line if we were not limited by m */ 00152 *offset = 0; 00153 virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N'); 00154 nbline = min(virtualnbline, m); 00155 return nbline - i; 00156 } else { 00157 int firstline; /* first line in the j_th column */ 00158 int diagcol; /* column where the diag begin */ 00159 int virtualline; /* virtual first line if the matrix was extended with 00160 * negative indices */ 00161 int off; 00162 diagcol = max(n - m, 0);; 00163 virtualline = j - diagcol + (toupper(*diag) == 'U'); 00164 firstline = max(0, virtualline); 00165 off = max(firstline - i, 0); 00166 *offset = off; 00167 i += off; 00168 return m - i; 00169 } 00170 }/* insidemat() */ 00171 /********************************************************************/ 00172 /* Execute an action on the local memories when an intersection occurs (the 00173 * action can be the filling of the memory buffer, the count of the memory 00174 * buffer size or the setting of the memory with the element received) */ 00175 static2 void 00176 intersect(uplo, diag, 00177 j, start, end, 00178 action, 00179 ptrsizebuff, pptrbuff, ptrblock, 00180 m, n, 00181 ma, ia, ja, templateheight0, templatewidth0, 00182 mb, ib, jb, templateheight1, templatewidth1) 00183 int action, *ptrsizebuff; 00184 int j, start, end; 00185 float **pptrbuff, *ptrblock; 00186 int templateheight0, templatewidth0; 00187 int templateheight1, templatewidth1; 00188 MDESC *ma, *mb; 00189 int ia, ja, ib, jb, m, n; 00190 char *uplo, *diag; 00191 /* Execute the action on the local memory for the current interval and 00192 * increment pptrbuff and ptrsizebuff of the intervalsize */ 00193 /* Notice that if the interval is contigous in the virtual matrice, it is 00194 * also contigous in the real one ! */ 00195 { 00196 /* int un = 1; only when we use dcopy instead of memcpy */ 00197 float *ptrstart; 00198 int offset, nbline; 00199 int intervalsize; 00200 assert(start < end); 00201 assert(j >= 0 && j < n); 00202 nbline = 00203 insidemat(uplo, diag, start, j, m, n, &offset); 00204 if (nbline <= 0) 00205 return; 00206 start += offset; 00207 if (start >= end) 00208 return; 00209 intervalsize = min(end - start, nbline); 00210 (*ptrsizebuff) += intervalsize; 00211 switch (action) { 00212 case SENDBUFF: /* fill buff with local elements to be sent */ 00213 ptrstart = ptrblock + localindice(start + ia, j + ja, 00214 templateheight0, templatewidth0, ma); 00215 memcpy((char *) (*pptrbuff), (char *) ptrstart, 00216 intervalsize * sizeof(float)); 00217 /* scopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff), 00218 * &un); */ 00219 (*pptrbuff) += intervalsize; 00220 break; 00221 case RECVBUFF: /* fill local memory with the values received */ 00222 ptrstart = ptrblock + localindice(start + ib, j + jb, 00223 templateheight1, templatewidth1, mb); 00224 memcpy((char *) ptrstart, (char *) (*pptrbuff), 00225 intervalsize * sizeof(float)); 00226 /* scopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart), 00227 * &un); */ 00228 (*pptrbuff) += intervalsize; 00229 break; 00230 case SIZEBUFF: /* computation of sizebuff */ 00231 break; 00232 default: 00233 printf("action is %d outside the scope of the case [0..2] !! \n ", action); 00234 exit(0); 00235 break; 00236 }; /* switch (action) */ 00237 }/* intersect() */ 00238 /* scan_intervals: scans two distributions in one dimension, and compute the 00239 * intersections on the local processor. result must be long enough to 00240 * contains the result that are stocked in IDESC structure, the function 00241 * returns the number of intersections found */ 00242 int 00243 scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1, 00244 result) 00245 char type; 00246 int ja, jb, n, q0, q1, col0, col1; 00247 MDESC *ma, *mb; 00248 IDESC *result; 00249 { 00250 int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1; 00251 int l; /* local indice on the beginning of the interval */ 00252 assert(type == 'c' || type == 'r'); 00253 nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow); 00254 nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow); 00255 templatewidth0 = q0 * nbcol0; 00256 templatewidth1 = q1 * nbcol1; 00257 { 00258 int sp0 = (type == 'c' ? ma->spcol : ma->sprow); 00259 int sp1 = (type == 'c' ? mb->spcol : mb->sprow); 00260 j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja; 00261 j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb; 00262 } 00263 offset = 0; 00264 l = 0; 00265 /* a small check to verify that the submatrix begin inside the first block 00266 * of the original matrix, this done by a sort of coordinate change at the 00267 * beginning of the Cpstrmr2d */ 00268 assert(j0 + nbcol0 > 0); 00269 assert(j1 + nbcol1 > 0); 00270 while ((j0 < n) && (j1 < n)) { 00271 int end0, end1; 00272 int start, end; 00273 end0 = j0 + nbcol0; 00274 end1 = j1 + nbcol1; 00275 if (end0 <= j1) { 00276 j0 += templatewidth0; 00277 l += nbcol0; 00278 continue; 00279 } 00280 if (end1 <= j0) { 00281 j1 += templatewidth1; 00282 continue; 00283 } 00284 /* compute the raw intersection */ 00285 start = max(j0, j1); 00286 start = max(start, 0); 00287 /* the start is correct now, update the corresponding fields */ 00288 result[offset].gstart = start; 00289 end = min(end0, end1); 00290 if (end0 == end) { 00291 j0 += templatewidth0; 00292 l += nbcol0; 00293 } 00294 if (end1 == end) 00295 j1 += templatewidth1; 00296 /* throw the limit if they go out of the matrix */ 00297 end = min(end, n); 00298 assert(end > start); 00299 /* it is a bit tricky to see why the length is always positive after all 00300 * this min and max, first we have the property that every interval 00301 * considered is at least partly into the submatrix, second we arrive 00302 * here only if the raw intersection is non-void, if we remove a limit 00303 * that means the corresponding frontier is in both intervals which 00304 * proove the final interval is non-void, clear ?? */ 00305 result[offset].len = end - start; 00306 offset += 1; 00307 } /* while */ 00308 return offset; 00309 } 00310 /*********************************************************************/ 00311 /* Do the scanning of intervals and the requested action */ 00312 void 00313 scanD0(uplo, diag, action, ptrbuff, ptrsizebuff, 00314 m, n, 00315 ma, ia, ja, p0, q0, 00316 mb, ib, jb, p1, q1, 00317 v_inter, vinter_nb, 00318 h_inter, hinter_nb, 00319 ptrblock) 00320 int action, /* # of the action done on the intersected intervals */ 00321 *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be 00322 * an output parameter in every cases) */ 00323 float *ptrbuff /* address of the communication ptrbuffer (a 00324 * suffisant memory space is supposed to be allocated 00325 before the call) */ , *ptrblock; 00326 int p0, q0, p1, q1; 00327 IDESC *v_inter, *h_inter; 00328 int vinter_nb, hinter_nb; 00329 int m, n; 00330 int ia, ja, ib, jb; 00331 MDESC *ma, *mb; 00332 char *uplo, *diag; 00333 {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to 00334 * the [a..a+au-1] interval of length au */ 00335 int templateheight1, templatewidth1; 00336 int templateheight0, templatewidth0; 00337 int h, v; /* for scanning the intervals */ 00338 /* initializations */ 00339 templateheight1 = p1 * mb->nbrow; 00340 templateheight0 = p0 * ma->nbrow; 00341 templatewidth1 = q1 * mb->nbcol; 00342 templatewidth0 = q0 * ma->nbcol; 00343 /* we now will deal will logical grids, that's to say we change our 00344 * numbering of processors so that (0,0) begin on logical processor (0,0) */ 00345 /* in case we will not enter the while loop */ 00346 (*ptrsizebuff) = 0; 00347 for (h = 0; h < hinter_nb; h++) 00348 for (v = 0; v < vinter_nb; v++) { 00349 int j; 00350 for (j = 0; j < h_inter[h].len; j++) 00351 intersect(uplo, diag, j + h_inter[h].gstart, 00352 v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len, 00353 action, ptrsizebuff, &ptrbuff, ptrblock, m, n, 00354 ma, ia, ja, templateheight0, templatewidth0, 00355 mb, ib, jb, templateheight1, templatewidth1); 00356 } 00357 }/* scanD0() */