ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pstrmr2.c
Go to the documentation of this file.
00001 #include "redist.h"
00002 /* $Id: pstrmr2.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $
00003  * 
00004  * some functions used by the pstrmr2d routine see file pstrmr.c for more
00005  * documentation.
00006  * 
00007  * Created March 1993 by B. Tourancheau (See sccs for modifications). */
00008 #define static2 static
00009 #if defined(Add_) || defined(f77IsF2C)
00010 #define fortran_mr2d pstrmr2do_
00011 #define fortran_mr2dnew pstrmr2d_
00012 #elif defined(UpCase)
00013 #define fortran_mr2dnew PSTRMR2D
00014 #define fortran_mr2d PSTRMR2DO
00015 #define scopy_ SCOPY
00016 #define slacpy_ SLACPY
00017 #else
00018 #define fortran_mr2d pstrmr2do
00019 #define fortran_mr2dnew pstrmr2d
00020 #define scopy_ scopy
00021 #define slacpy_ slacpy
00022 #endif
00023 #define Clacpy Cstrlacpy
00024 void  Clacpy();
00025 typedef struct {
00026   int   desctype;
00027   int   ctxt;
00028   int   m;
00029   int   n;
00030   int   nbrow;
00031   int   nbcol;
00032   int   sprow;
00033   int   spcol;
00034   int   lda;
00035 }     MDESC;
00036 #define BLOCK_CYCLIC_2D 1
00037 typedef struct {
00038   int   gstart;
00039   int   len;
00040 }     IDESC;
00041 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
00042 #define max(A,B) ((A)>(B)?(A):(B))
00043 #define min(A,B) ((A)>(B)?(B):(A))
00044 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
00045 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
00046 #ifdef MALLOCDEBUG
00047 #define malloc mymalloc
00048 #define free myfree
00049 #define realloc myrealloc
00050 #endif
00051 /* Cblacs */
00052 extern void Cblacs_pcoord();
00053 extern int Cblacs_pnum();
00054 extern void Csetpvmtids();
00055 extern void Cblacs_get();
00056 extern void Cblacs_pinfo();
00057 extern void Cblacs_gridinfo();
00058 extern void Cblacs_gridinit();
00059 extern void Cblacs_exit();
00060 extern void Cblacs_gridexit();
00061 extern void Cblacs_setup();
00062 extern void Cigebs2d();
00063 extern void Cigebr2d();
00064 extern void Cigesd2d();
00065 extern void Cigerv2d();
00066 extern void Cigsum2d();
00067 extern void Cigamn2d();
00068 extern void Cigamx2d();
00069 extern void Csgesd2d();
00070 extern void Csgerv2d();
00071 /* lapack */
00072 void  slacpy_();
00073 /* aux fonctions */
00074 extern int localindice();
00075 extern void *mr2d_malloc();
00076 extern int ppcm();
00077 extern int localsize();
00078 extern int memoryblocksize();
00079 extern int changeorigin();
00080 extern void paramcheck();
00081 /* tools and others function */
00082 #define scanD0 strscanD0
00083 #define dispmat strdispmat
00084 #define setmemory strsetmemory
00085 #define freememory strfreememory
00086 #define scan_intervals strscan_intervals
00087 extern void scanD0();
00088 extern void dispmat();
00089 extern void setmemory();
00090 extern void freememory();
00091 extern int scan_intervals();
00092 extern void Cpstrmr2do();
00093 extern void Cpstrmr2d();
00094 /* some defines for Cpstrmr2do */
00095 #define SENDBUFF 0
00096 #define RECVBUFF 1
00097 #define SIZEBUFF 2
00098 #if 0
00099 #define DEBUG
00100 #endif
00101 #ifndef DEBUG
00102 #define NDEBUG
00103 #endif
00104 #include <stdio.h>
00105 #include <stdlib.h>
00106 #include <string.h>
00107 #include <assert.h>
00108 #include <ctype.h>
00109 /* Created March 1993 by B. Tourancheau (See sccs for modifications). */
00110 /************************************************************************/
00111 /* Set the memory space with the malloc function */
00112 void
00113 setmemory(adpointer, blocksize)
00114   float **adpointer;
00115   int   blocksize;
00116 {
00117   assert(blocksize >= 0);
00118   if (blocksize == 0) {
00119     *adpointer = NULL;
00120     return;
00121   }
00122   *adpointer = (float *) mr2d_malloc(
00123                                      blocksize * sizeof(float));
00124 }
00125 /******************************************************************/
00126 /* Free the memory space after the malloc */
00127 void
00128 freememory(ptrtobefreed)
00129   float *ptrtobefreed;
00130 {
00131   if (ptrtobefreed == NULL)
00132     return;
00133   free((char *) ptrtobefreed);
00134 }
00135 /* extern functions for intersect() extern scopy_(); */
00136 /**************************************************************/
00137 /* return the number of elements int the column after i and the distance of
00138  * the first one from i, i,j can be negative out of borns, the number of
00139  * elements returned can be negative (means 0) */
00140 static2 int
00141 insidemat(uplo, diag, i, j, m, n, offset)
00142   int   m, n, i, j;     /* coordonnees de depart, taille de la sous-matrice */
00143   char *uplo, *diag;
00144   int  *offset;
00145 {
00146   /* tests outside mxn */
00147   assert(j >= 0 && j < n);
00148   assert(i >= 0);
00149   if (toupper(*uplo) == 'U') {
00150     int   nbline;       /* number of lines in the j_th column */
00151     int   virtualnbline;        /* number of line if we were not limited by m */
00152     *offset = 0;
00153     virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N');
00154     nbline = min(virtualnbline, m);
00155     return nbline - i;
00156   } else {
00157     int   firstline;    /* first line in the j_th column */
00158     int   diagcol;      /* column where the diag begin */
00159     int   virtualline;  /* virtual first line if the matrix was extended with
00160                          * negative indices */
00161     int   off;
00162     diagcol = max(n - m, 0);;
00163     virtualline = j - diagcol + (toupper(*diag) == 'U');
00164     firstline = max(0, virtualline);
00165     off = max(firstline - i, 0);
00166     *offset = off;
00167     i += off;
00168     return m - i;
00169   }
00170 }/* insidemat() */
00171 /********************************************************************/
00172 /* Execute an action on the local memories when an intersection occurs (the
00173  * action can be the filling of the memory buffer, the count of the memory
00174  * buffer size or the setting of the memory with the element received) */
00175 static2 void
00176 intersect(uplo, diag,
00177           j, start, end,
00178           action,
00179           ptrsizebuff, pptrbuff, ptrblock,
00180           m, n,
00181           ma, ia, ja, templateheight0, templatewidth0,
00182           mb, ib, jb, templateheight1, templatewidth1)
00183   int   action, *ptrsizebuff;
00184   int   j, start, end;
00185   float **pptrbuff, *ptrblock;
00186   int   templateheight0, templatewidth0;
00187   int   templateheight1, templatewidth1;
00188   MDESC *ma, *mb;
00189   int   ia, ja, ib, jb, m, n;
00190   char *uplo, *diag;
00191 /* Execute the action on the local memory for the current interval and
00192  * increment pptrbuff and ptrsizebuff of the intervalsize */
00193 /* Notice that if the interval is contigous in the virtual matrice, it is
00194  * also contigous in the real one ! */
00195 {
00196   /* int       un = 1; only when we use dcopy instead of memcpy */
00197   float *ptrstart;
00198   int   offset, nbline;
00199   int   intervalsize;
00200   assert(start < end);
00201   assert(j >= 0 && j < n);
00202   nbline =
00203         insidemat(uplo, diag, start, j, m, n, &offset);
00204   if (nbline <= 0)
00205     return;
00206   start += offset;
00207   if (start >= end)
00208     return;
00209   intervalsize = min(end - start, nbline);
00210   (*ptrsizebuff) += intervalsize;
00211   switch (action) {
00212   case SENDBUFF:        /* fill buff with local elements to be sent */
00213     ptrstart = ptrblock + localindice(start + ia, j + ja,
00214                                       templateheight0, templatewidth0, ma);
00215     memcpy((char *) (*pptrbuff), (char *) ptrstart,
00216            intervalsize * sizeof(float));
00217     /* scopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff),
00218      * &un); */
00219     (*pptrbuff) += intervalsize;
00220     break;
00221   case RECVBUFF:        /* fill local memory with the values received */
00222     ptrstart = ptrblock + localindice(start + ib, j + jb,
00223                                       templateheight1, templatewidth1, mb);
00224     memcpy((char *) ptrstart, (char *) (*pptrbuff),
00225            intervalsize * sizeof(float));
00226     /* scopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart),
00227      * &un); */
00228     (*pptrbuff) += intervalsize;
00229     break;
00230   case SIZEBUFF:        /* computation of sizebuff */
00231     break;
00232   default:
00233     printf("action is  %d outside the scope of the case [0..2] !! \n ", action);
00234     exit(0);
00235     break;
00236   };    /* switch (action) */
00237 }/* intersect() */
00238 /* scan_intervals: scans two distributions in one dimension, and compute the
00239  * intersections on the local processor. result must be long enough to
00240  * contains the result that are stocked in IDESC structure, the function
00241  * returns the number of intersections found */
00242 int 
00243 scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1,
00244                result)
00245   char  type;
00246   int   ja, jb, n, q0, q1, col0, col1;
00247   MDESC *ma, *mb;
00248   IDESC *result;
00249 {
00250   int   offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1;
00251   int   l;      /* local indice on the beginning of the interval */
00252   assert(type == 'c' || type == 'r');
00253   nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow);
00254   nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow);
00255   templatewidth0 = q0 * nbcol0;
00256   templatewidth1 = q1 * nbcol1;
00257   {
00258     int   sp0 = (type == 'c' ? ma->spcol : ma->sprow);
00259     int   sp1 = (type == 'c' ? mb->spcol : mb->sprow);
00260     j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja;
00261     j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb;
00262   }
00263   offset = 0;
00264   l = 0;
00265   /* a small check to verify that the submatrix begin inside the first block
00266    * of the original matrix, this done by a sort of coordinate change at the
00267    * beginning of the Cpstrmr2d */
00268   assert(j0 + nbcol0 > 0);
00269   assert(j1 + nbcol1 > 0);
00270   while ((j0 < n) && (j1 < n)) {
00271     int   end0, end1;
00272     int   start, end;
00273     end0 = j0 + nbcol0;
00274     end1 = j1 + nbcol1;
00275     if (end0 <= j1) {
00276       j0 += templatewidth0;
00277       l += nbcol0;
00278       continue;
00279     }
00280     if (end1 <= j0) {
00281       j1 += templatewidth1;
00282       continue;
00283     }
00284     /* compute the raw intersection */
00285     start = max(j0, j1);
00286     start = max(start, 0);
00287     /* the start is correct now, update the corresponding fields */
00288     result[offset].gstart = start;
00289     end = min(end0, end1);
00290     if (end0 == end) {
00291       j0 += templatewidth0;
00292       l += nbcol0;
00293     }
00294     if (end1 == end)
00295       j1 += templatewidth1;
00296     /* throw the limit if they go out of the matrix */
00297     end = min(end, n);
00298     assert(end > start);
00299     /* it is a bit tricky to see why the length is always positive after all
00300      * this min and max, first we have the property that every interval
00301      * considered is at least partly into the submatrix, second we arrive
00302      * here only if the raw intersection is non-void, if we remove a limit
00303      * that means the corresponding frontier is in both intervals which
00304      * proove the final interval is non-void, clear ?? */
00305     result[offset].len = end - start;
00306     offset += 1;
00307   }     /* while */
00308   return offset;
00309 }
00310 /*********************************************************************/
00311 /* Do the scanning of intervals and the requested action */
00312 void
00313 scanD0(uplo, diag, action, ptrbuff, ptrsizebuff,
00314        m, n,
00315        ma, ia, ja, p0, q0,
00316        mb, ib, jb, p1, q1,
00317        v_inter, vinter_nb,
00318        h_inter, hinter_nb,
00319        ptrblock)
00320   int   action, /* # of the action done on the intersected intervals  */
00321        *ptrsizebuff;    /* size of the communication ptrbuffer (chosen to be
00322                          * an output parameter in every cases) */
00323   float *ptrbuff        /* address of the communication ptrbuffer (a
00324                          * suffisant memory space is supposed to be allocated
00325       before the call) */ , *ptrblock;
00326   int   p0, q0, p1, q1;
00327   IDESC *v_inter, *h_inter;
00328   int   vinter_nb, hinter_nb;
00329   int   m, n;
00330   int   ia, ja, ib, jb;
00331   MDESC *ma, *mb;
00332   char *uplo, *diag;
00333 {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to
00334   * the [a..a+au-1] interval of length au */
00335   int   templateheight1, templatewidth1;
00336   int   templateheight0, templatewidth0;
00337   int   h, v;   /* for scanning the intervals */
00338   /* initializations */
00339   templateheight1 = p1 * mb->nbrow;
00340   templateheight0 = p0 * ma->nbrow;
00341   templatewidth1 = q1 * mb->nbcol;
00342   templatewidth0 = q0 * ma->nbcol;
00343   /* we now will deal will logical grids, that's to say we change our
00344    * numbering of processors so that (0,0) begin on logical processor (0,0) */
00345   /* in case we will not enter the while loop */
00346   (*ptrsizebuff) = 0;
00347   for (h = 0; h < hinter_nb; h++)
00348     for (v = 0; v < vinter_nb; v++) {
00349       int   j;
00350       for (j = 0; j < h_inter[h].len; j++)
00351         intersect(uplo, diag, j + h_inter[h].gstart,
00352                   v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len,
00353                   action, ptrsizebuff, &ptrbuff, ptrblock, m, n,
00354                   ma, ia, ja, templateheight0, templatewidth0,
00355                   mb, ib, jb, templateheight1, templatewidth1);
00356     }
00357 }/* scanD0() */