ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pztrmr2.c
Go to the documentation of this file.
00001 #include "redist.h"
00002 /* $Id: pztrmr2.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $
00003  * 
00004  * some functions used by the pztrmr2d routine see file pztrmr.c for more
00005  * documentation.
00006  * 
00007  * Created March 1993 by B. Tourancheau (See sccs for modifications). */
00008 #define static2 static
00009 #if defined(Add_) || defined(f77IsF2C)
00010 #define fortran_mr2d pztrmr2do_
00011 #define fortran_mr2dnew pztrmr2d_
00012 #elif defined(UpCase)
00013 #define fortran_mr2dnew PZTRMR2D
00014 #define fortran_mr2d PZTRMR2DO
00015 #define zcopy_ ZCOPY
00016 #define zlacpy_ ZLACPY
00017 #else
00018 #define fortran_mr2d pztrmr2do
00019 #define fortran_mr2dnew pztrmr2d
00020 #define zcopy_ zcopy
00021 #define zlacpy_ zlacpy
00022 #endif
00023 #define Clacpy Cztrlacpy
00024 void  Clacpy();
00025 typedef struct {
00026   double r, i;
00027 }     dcomplex;
00028 typedef struct {
00029   int   desctype;
00030   int   ctxt;
00031   int   m;
00032   int   n;
00033   int   nbrow;
00034   int   nbcol;
00035   int   sprow;
00036   int   spcol;
00037   int   lda;
00038 }     MDESC;
00039 #define BLOCK_CYCLIC_2D 1
00040 typedef struct {
00041   int   gstart;
00042   int   len;
00043 }     IDESC;
00044 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
00045 #define max(A,B) ((A)>(B)?(A):(B))
00046 #define min(A,B) ((A)>(B)?(B):(A))
00047 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
00048 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
00049 #ifdef MALLOCDEBUG
00050 #define malloc mymalloc
00051 #define free myfree
00052 #define realloc myrealloc
00053 #endif
00054 /* Cblacs */
00055 extern void Cblacs_pcoord();
00056 extern int Cblacs_pnum();
00057 extern void Csetpvmtids();
00058 extern void Cblacs_get();
00059 extern void Cblacs_pinfo();
00060 extern void Cblacs_gridinfo();
00061 extern void Cblacs_gridinit();
00062 extern void Cblacs_exit();
00063 extern void Cblacs_gridexit();
00064 extern void Cblacs_setup();
00065 extern void Cigebs2d();
00066 extern void Cigebr2d();
00067 extern void Cigesd2d();
00068 extern void Cigerv2d();
00069 extern void Cigsum2d();
00070 extern void Cigamn2d();
00071 extern void Cigamx2d();
00072 extern void Czgesd2d();
00073 extern void Czgerv2d();
00074 /* lapack */
00075 void  zlacpy_();
00076 /* aux fonctions */
00077 extern int localindice();
00078 extern void *mr2d_malloc();
00079 extern int ppcm();
00080 extern int localsize();
00081 extern int memoryblocksize();
00082 extern int changeorigin();
00083 extern void paramcheck();
00084 /* tools and others function */
00085 #define scanD0 ztrscanD0
00086 #define dispmat ztrdispmat
00087 #define setmemory ztrsetmemory
00088 #define freememory ztrfreememory
00089 #define scan_intervals ztrscan_intervals
00090 extern void scanD0();
00091 extern void dispmat();
00092 extern void setmemory();
00093 extern void freememory();
00094 extern int scan_intervals();
00095 extern void Cpztrmr2do();
00096 extern void Cpztrmr2d();
00097 /* some defines for Cpztrmr2do */
00098 #define SENDBUFF 0
00099 #define RECVBUFF 1
00100 #define SIZEBUFF 2
00101 #if 0
00102 #define DEBUG
00103 #endif
00104 #ifndef DEBUG
00105 #define NDEBUG
00106 #endif
00107 #include <stdio.h>
00108 #include <stdlib.h>
00109 #include <string.h>
00110 #include <assert.h>
00111 #include <ctype.h>
00112 /* Created March 1993 by B. Tourancheau (See sccs for modifications). */
00113 /************************************************************************/
00114 /* Set the memory space with the malloc function */
00115 void
00116 setmemory(adpointer, blocksize)
00117   dcomplex **adpointer;
00118   int   blocksize;
00119 {
00120   assert(blocksize >= 0);
00121   if (blocksize == 0) {
00122     *adpointer = NULL;
00123     return;
00124   }
00125   *adpointer = (dcomplex *) mr2d_malloc(
00126                                         blocksize * sizeof(dcomplex));
00127 }
00128 /******************************************************************/
00129 /* Free the memory space after the malloc */
00130 void
00131 freememory(ptrtobefreed)
00132   dcomplex *ptrtobefreed;
00133 {
00134   if (ptrtobefreed == NULL)
00135     return;
00136   free((char *) ptrtobefreed);
00137 }
00138 /* extern functions for intersect() extern zcopy_(); */
00139 /**************************************************************/
00140 /* return the number of elements int the column after i and the distance of
00141  * the first one from i, i,j can be negative out of borns, the number of
00142  * elements returned can be negative (means 0) */
00143 static2 int
00144 insidemat(uplo, diag, i, j, m, n, offset)
00145   int   m, n, i, j;     /* coordonnees de depart, taille de la sous-matrice */
00146   char *uplo, *diag;
00147   int  *offset;
00148 {
00149   /* tests outside mxn */
00150   assert(j >= 0 && j < n);
00151   assert(i >= 0);
00152   if (toupper(*uplo) == 'U') {
00153     int   nbline;       /* number of lines in the j_th column */
00154     int   virtualnbline;        /* number of line if we were not limited by m */
00155     *offset = 0;
00156     virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N');
00157     nbline = min(virtualnbline, m);
00158     return nbline - i;
00159   } else {
00160     int   firstline;    /* first line in the j_th column */
00161     int   diagcol;      /* column where the diag begin */
00162     int   virtualline;  /* virtual first line if the matrix was extended with
00163                          * negative indices */
00164     int   off;
00165     diagcol = max(n - m, 0);;
00166     virtualline = j - diagcol + (toupper(*diag) == 'U');
00167     firstline = max(0, virtualline);
00168     off = max(firstline - i, 0);
00169     *offset = off;
00170     i += off;
00171     return m - i;
00172   }
00173 }/* insidemat() */
00174 /********************************************************************/
00175 /* Execute an action on the local memories when an intersection occurs (the
00176  * action can be the filling of the memory buffer, the count of the memory
00177  * buffer size or the setting of the memory with the element received) */
00178 static2 void
00179 intersect(uplo, diag,
00180           j, start, end,
00181           action,
00182           ptrsizebuff, pptrbuff, ptrblock,
00183           m, n,
00184           ma, ia, ja, templateheight0, templatewidth0,
00185           mb, ib, jb, templateheight1, templatewidth1)
00186   int   action, *ptrsizebuff;
00187   int   j, start, end;
00188   dcomplex **pptrbuff, *ptrblock;
00189   int   templateheight0, templatewidth0;
00190   int   templateheight1, templatewidth1;
00191   MDESC *ma, *mb;
00192   int   ia, ja, ib, jb, m, n;
00193   char *uplo, *diag;
00194 /* Execute the action on the local memory for the current interval and
00195  * increment pptrbuff and ptrsizebuff of the intervalsize */
00196 /* Notice that if the interval is contigous in the virtual matrice, it is
00197  * also contigous in the real one ! */
00198 {
00199   /* int       un = 1; only when we use dcopy instead of memcpy */
00200   dcomplex *ptrstart;
00201   int   offset, nbline;
00202   int   intervalsize;
00203   assert(start < end);
00204   assert(j >= 0 && j < n);
00205   nbline =
00206         insidemat(uplo, diag, start, j, m, n, &offset);
00207   if (nbline <= 0)
00208     return;
00209   start += offset;
00210   if (start >= end)
00211     return;
00212   intervalsize = min(end - start, nbline);
00213   (*ptrsizebuff) += intervalsize;
00214   switch (action) {
00215   case SENDBUFF:        /* fill buff with local elements to be sent */
00216     ptrstart = ptrblock + localindice(start + ia, j + ja,
00217                                       templateheight0, templatewidth0, ma);
00218     memcpy((char *) (*pptrbuff), (char *) ptrstart,
00219            intervalsize * sizeof(dcomplex));
00220     /* zcopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff),
00221      * &un); */
00222     (*pptrbuff) += intervalsize;
00223     break;
00224   case RECVBUFF:        /* fill local memory with the values received */
00225     ptrstart = ptrblock + localindice(start + ib, j + jb,
00226                                       templateheight1, templatewidth1, mb);
00227     memcpy((char *) ptrstart, (char *) (*pptrbuff),
00228            intervalsize * sizeof(dcomplex));
00229     /* zcopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart),
00230      * &un); */
00231     (*pptrbuff) += intervalsize;
00232     break;
00233   case SIZEBUFF:        /* computation of sizebuff */
00234     break;
00235   default:
00236     printf("action is  %d outside the scope of the case [0..2] !! \n ", action);
00237     exit(0);
00238     break;
00239   };    /* switch (action) */
00240 }/* intersect() */
00241 /* scan_intervals: scans two distributions in one dimension, and compute the
00242  * intersections on the local processor. result must be long enough to
00243  * contains the result that are stocked in IDESC structure, the function
00244  * returns the number of intersections found */
00245 int 
00246 scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1,
00247                result)
00248   char  type;
00249   int   ja, jb, n, q0, q1, col0, col1;
00250   MDESC *ma, *mb;
00251   IDESC *result;
00252 {
00253   int   offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1;
00254   int   l;      /* local indice on the beginning of the interval */
00255   assert(type == 'c' || type == 'r');
00256   nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow);
00257   nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow);
00258   templatewidth0 = q0 * nbcol0;
00259   templatewidth1 = q1 * nbcol1;
00260   {
00261     int   sp0 = (type == 'c' ? ma->spcol : ma->sprow);
00262     int   sp1 = (type == 'c' ? mb->spcol : mb->sprow);
00263     j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja;
00264     j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb;
00265   }
00266   offset = 0;
00267   l = 0;
00268   /* a small check to verify that the submatrix begin inside the first block
00269    * of the original matrix, this done by a sort of coordinate change at the
00270    * beginning of the Cpztrmr2d */
00271   assert(j0 + nbcol0 > 0);
00272   assert(j1 + nbcol1 > 0);
00273   while ((j0 < n) && (j1 < n)) {
00274     int   end0, end1;
00275     int   start, end;
00276     end0 = j0 + nbcol0;
00277     end1 = j1 + nbcol1;
00278     if (end0 <= j1) {
00279       j0 += templatewidth0;
00280       l += nbcol0;
00281       continue;
00282     }
00283     if (end1 <= j0) {
00284       j1 += templatewidth1;
00285       continue;
00286     }
00287     /* compute the raw intersection */
00288     start = max(j0, j1);
00289     start = max(start, 0);
00290     /* the start is correct now, update the corresponding fields */
00291     result[offset].gstart = start;
00292     end = min(end0, end1);
00293     if (end0 == end) {
00294       j0 += templatewidth0;
00295       l += nbcol0;
00296     }
00297     if (end1 == end)
00298       j1 += templatewidth1;
00299     /* throw the limit if they go out of the matrix */
00300     end = min(end, n);
00301     assert(end > start);
00302     /* it is a bit tricky to see why the length is always positive after all
00303      * this min and max, first we have the property that every interval
00304      * considered is at least partly into the submatrix, second we arrive
00305      * here only if the raw intersection is non-void, if we remove a limit
00306      * that means the corresponding frontier is in both intervals which
00307      * proove the final interval is non-void, clear ?? */
00308     result[offset].len = end - start;
00309     offset += 1;
00310   }     /* while */
00311   return offset;
00312 }
00313 /*********************************************************************/
00314 /* Do the scanning of intervals and the requested action */
00315 void
00316 scanD0(uplo, diag, action, ptrbuff, ptrsizebuff,
00317        m, n,
00318        ma, ia, ja, p0, q0,
00319        mb, ib, jb, p1, q1,
00320        v_inter, vinter_nb,
00321        h_inter, hinter_nb,
00322        ptrblock)
00323   int   action, /* # of the action done on the intersected intervals  */
00324        *ptrsizebuff;    /* size of the communication ptrbuffer (chosen to be
00325                          * an output parameter in every cases) */
00326   dcomplex *ptrbuff     /* address of the communication ptrbuffer (a
00327                          * suffisant memory space is supposed to be allocated
00328       before the call) */ , *ptrblock;
00329   int   p0, q0, p1, q1;
00330   IDESC *v_inter, *h_inter;
00331   int   vinter_nb, hinter_nb;
00332   int   m, n;
00333   int   ia, ja, ib, jb;
00334   MDESC *ma, *mb;
00335   char *uplo, *diag;
00336 {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to
00337   * the [a..a+au-1] interval of length au */
00338   int   templateheight1, templatewidth1;
00339   int   templateheight0, templatewidth0;
00340   int   h, v;   /* for scanning the intervals */
00341   /* initializations */
00342   templateheight1 = p1 * mb->nbrow;
00343   templateheight0 = p0 * ma->nbrow;
00344   templatewidth1 = q1 * mb->nbcol;
00345   templatewidth0 = q0 * ma->nbcol;
00346   /* we now will deal will logical grids, that's to say we change our
00347    * numbering of processors so that (0,0) begin on logical processor (0,0) */
00348   /* in case we will not enter the while loop */
00349   (*ptrsizebuff) = 0;
00350   for (h = 0; h < hinter_nb; h++)
00351     for (v = 0; v < vinter_nb; v++) {
00352       int   j;
00353       for (j = 0; j < h_inter[h].len; j++)
00354         intersect(uplo, diag, j + h_inter[h].gstart,
00355                   v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len,
00356                   action, ptrsizebuff, &ptrbuff, ptrblock, m, n,
00357                   ma, ia, ja, templateheight0, templatewidth0,
00358                   mb, ib, jb, templateheight1, templatewidth1);
00359     }
00360 }/* scanD0() */