ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
PB_Cplapad.c
Go to the documentation of this file.
00001 /* ---------------------------------------------------------------------
00002 *
00003 *  -- PBLAS auxiliary routine (version 2.0) --
00004 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00005 *     and University of California, Berkeley.
00006 *     April 1, 1998
00007 *
00008 *  ---------------------------------------------------------------------
00009 */
00010 /*
00011 *  Include files
00012 */
00013 #include "../pblas.h"
00014 #include "../PBpblas.h"
00015 #include "../PBtools.h"
00016 #include "../PBblacs.h"
00017 #include "../PBblas.h"
00018 
00019 #ifdef __STDC__
00020 void PB_Cplapad( PBTYP_T * TYPE, char * UPLO, char * CONJUG, int M,
00021                  int N, char * ALPHA, char * BETA, char * A, int IA,
00022                  int JA, int * DESCA )
00023 #else
00024 void PB_Cplapad( TYPE, UPLO, CONJUG, M, N, ALPHA, BETA, A, IA, JA,
00025                  DESCA )
00026 /*
00027 *  .. Scalar Arguments ..
00028 */
00029    char           * CONJUG, * UPLO;
00030    int            IA, JA, M, N;
00031    char           * ALPHA, * BETA;
00032    PBTYP_T        * TYPE;
00033 /*
00034 *  .. Array Arguments ..
00035 */
00036    int            * DESCA;
00037    char           * A;
00038 #endif
00039 {
00040 /*
00041 *  Purpose
00042 *  =======
00043 *
00044 *  PB_Cplapad  initializes  an m by n  submatrix  A(IA:IA+M-1,JA:JA+N-1)
00045 *  denoted by  sub( A )  to beta on the diagonal or  zeros the imaginary
00046 *  part of those diagonals and set the offdiagonals to alpha.
00047 *
00048 *  Notes
00049 *  =====
00050 *
00051 *  A description  vector  is associated with each 2D block-cyclicly dis-
00052 *  tributed matrix.  This  vector  stores  the  information  required to
00053 *  establish the  mapping  between a  matrix entry and its corresponding
00054 *  process and memory location.
00055 *
00056 *  In  the  following  comments,   the character _  should  be  read  as
00057 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
00058 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
00059 *
00060 *  NOTATION         STORED IN       EXPLANATION
00061 *  ---------------- --------------- ------------------------------------
00062 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
00063 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
00064 *                                   the NPROW x NPCOL BLACS process grid
00065 *                                   A  is  distributed over. The context
00066 *                                   itself  is  global,  but  the handle
00067 *                                   (the integer value) may vary.
00068 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
00069 *                                   ted matrix A, M_A >= 0.
00070 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
00071 *                                   buted matrix A, N_A >= 0.
00072 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
00073 *                                   block of the matrix A, IMB_A > 0.
00074 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
00075 *                                   left   block   of   the  matrix   A,
00076 *                                   INB_A > 0.
00077 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
00078 *                                   bute the last  M_A-IMB_A  rows of A,
00079 *                                   MB_A > 0.
00080 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
00081 *                                   bute the last  N_A-INB_A  columns of
00082 *                                   A, NB_A > 0.
00083 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
00084 *                                   row of the matrix  A is distributed,
00085 *                                   NPROW > RSRC_A >= 0.
00086 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
00087 *                                   first column of  A  is  distributed.
00088 *                                   NPCOL > CSRC_A >= 0.
00089 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
00090 *                                   array  storing  the  local blocks of
00091 *                                   the distributed matrix A,
00092 *                                   IF( Lc( 1, N_A ) > 0 )
00093 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
00094 *                                   ELSE
00095 *                                      LLD_A >= 1.
00096 *
00097 *  Let K be the number of  rows of a matrix A starting at the global in-
00098 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
00099 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
00100 *  receive if these K rows were distributed over NPROW processes.  If  K
00101 *  is the number of columns of a matrix  A  starting at the global index
00102 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
00103 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
00104 *  these K columns were distributed over NPCOL processes.
00105 *
00106 *  The values of Lr() and Lc() may be determined via a call to the func-
00107 *  tion PB_Cnumroc:
00108 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
00109 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
00110 *
00111 *  Arguments
00112 *  =========
00113 *
00114 *  TYPE    (local input) pointer to a PBTYP_T structure
00115 *          On entry,  TYPE  is a pointer to a structure of type PBTYP_T,
00116 *          that contains type information (See pblas.h).
00117 *
00118 *  UPLO    (global input) pointer to CHAR
00119 *          On entry, UPLO specifies the part  of  the submatrix sub( A )
00120 *          to be set:
00121 *             = 'L' or 'l': Lower triangular  part is set; the  strictly
00122 *                      upper triangular part of sub( A ) is not changed;
00123 *             = 'U' or 'u': Upper triangular  part is set; the  strictly
00124 *                      lower triangular part of sub( A ) is not changed;
00125 *             Otherwise:  All of the matrix sub( A ) is set.
00126 *
00127 *  CONJUG  (global input) pointer to CHAR
00128 *          On entry,  CONJUG specifies what should be done to the diago-
00129 *          nals as follows. When UPLO is 'L', 'l', 'U' or 'u' and CONJUG
00130 *          is 'Z' or 'z', the imaginary part of the diagonals is set  to
00131 *          zero. Otherwise, the diagonals are set to beta.
00132 *
00133 *  M       (global input) INTEGER
00134 *          On entry,  M  specifies the number of rows of  the  submatrix
00135 *          sub( A ). M  must be at least zero.
00136 *
00137 *  N       (global input) INTEGER
00138 *          On entry, N  specifies the number of columns of the submatrix
00139 *          sub( A ). N must be at least zero.
00140 *
00141 *  ALPHA   (global input) pointer to CHAR
00142 *          On entry,  ALPHA  specifies the scalar alpha, i.e., the cons-
00143 *          tant to which the offdiagonal elements are to be set.
00144 *
00145 *  BETA    (global input) pointer to CHAR
00146 *          On entry, BETA  specifies the scalar beta, i.e., the constant
00147 *          to which the diagonal elements are to be set. BETA is not re-
00148 *          ferenced when UPLO is 'L', 'l', 'U' or 'u' and CONJUG is 'Z'.
00149 *
00150 *  A       (local input/local output) pointer to CHAR
00151 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
00152 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
00153 *          the local entries of the matrix  A  to be  set.  On exit, the
00154 *          leading m by n submatrix sub( A ) is set as follows:
00155 *
00156 *          UPLO = 'L' or 'l', A(IA+i-1,JA+j-1)=ALPHA, j+1<=i<=M, 1<=j<=N
00157 *          UPLO = 'U' or 'u', A(IA+i-1,JA+j-1)=ALPHA, 1<=i<=j-1, 1<=j<=N
00158 *          otherwise,         A(IA+i-1,JA+j-1)=ALPHA, 1<=i<=M,   1<=j<=N
00159 *                                                      and IA+i.NE.JA+j,
00160 *          and, for all UPLO,  A(IA+i-1,JA+i-1) = BETA,  1<=i<=min(M,N).
00161 *
00162 *  IA      (global input) INTEGER
00163 *          On entry, IA  specifies A's global row index, which points to
00164 *          the beginning of the submatrix sub( A ).
00165 *
00166 *  JA      (global input) INTEGER
00167 *          On entry, JA  specifies A's global column index, which points
00168 *          to the beginning of the submatrix sub( A ).
00169 *
00170 *  DESCA   (global and local input) INTEGER array
00171 *          On entry, DESCA  is an integer array of dimension DLEN_. This
00172 *          is the array descriptor for the matrix A.
00173 *
00174 *  -- Written on April 1, 1998 by
00175 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
00176 *
00177 *  ---------------------------------------------------------------------
00178 */
00179 /*
00180 *  .. Local Scalars ..
00181 */
00182    char           type;
00183    int            Acol, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Ald, Amb, Amp, Amp0,
00184                   Anb, Anq, Anq0, Arow, ctxt, izero=0, k, kb, ktmp, mn, mycol,
00185                   myrow, nb, npcol, nprow, size;
00186    TZPAD_T        pad;
00187 /*
00188 *  .. Local Arrays ..
00189 */
00190    int            Ad0[DLEN_];
00191    char           * Aptr = NULL;
00192 /* ..
00193 *  .. Executable Statements ..
00194 *
00195 */
00196 /*
00197 *  Quick return if possible
00198 */
00199    if( ( M <= 0 ) || ( N <= 0 ) ) return;
00200 /*
00201 *  Retrieve process grid information
00202 */
00203    Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
00204 /*
00205 *  Compute descriptor Ad0 for sub( A )
00206 */
00207    PB_Cdescribe( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
00208                  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
00209 /*
00210 *  Quick return if I don't own any of sub( A ).
00211 */
00212    Amp = PB_Cnumroc( M, 0, Aimb1, Amb, myrow, Arow, nprow );
00213    Anq = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol );
00214    if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return;
00215 
00216    size = TYPE->size; type = TYPE->type; pad  = TYPE->Ftzpad;
00217    Aptr = Mptr( A, Aii, Ajj, Ald, size );
00218 /*
00219 *  When the entire sub( A ) needs to be padded and alpha is equal to beta, or
00220 *  sub( A ) is replicated in all processes, just call the local routine.
00221 */
00222    if( type == SREAL )
00223    {
00224       if( ( ( Mupcase( UPLO[0] ) == CALL ) &&
00225             ( ((float*)(ALPHA))[REAL_PART] ==
00226               ((float*)(BETA ))[REAL_PART] ) ) ||
00227           ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
00228             ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
00229       {
00230          pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA,
00231               BETA, Aptr, &Ald );
00232          return;
00233       }
00234    }
00235    else if( type == DREAL )
00236    {
00237       if( ( ( Mupcase( UPLO[0] ) == CALL ) &&
00238             ( ((double*)(ALPHA))[REAL_PART] ==
00239               ((double*)(BETA ))[REAL_PART] ) ) ||
00240           ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
00241             ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
00242       {
00243          pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA,
00244               BETA, Aptr, &Ald );
00245          return;
00246       }
00247    }
00248    else if( type == SCPLX )
00249    {
00250       if( ( ( Mupcase( UPLO[0] ) == CALL ) &&
00251             ( ((float*)(ALPHA))[REAL_PART] ==
00252               ((float*)(BETA ))[REAL_PART] ) &&
00253             ( ((float*)(ALPHA))[IMAG_PART] ==
00254               ((float*)(BETA ))[IMAG_PART] ) ) ||
00255           ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
00256             ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
00257       {
00258          pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA,
00259               BETA, Aptr, &Ald );
00260          return;
00261       }
00262    }
00263    else if( type == DCPLX )
00264    {
00265       if( ( ( Mupcase( UPLO[0] ) == CALL ) &&
00266             ( ((double*)(ALPHA))[REAL_PART] ==
00267               ((double*)(BETA ))[REAL_PART] ) &&
00268             ( ((double*)(ALPHA))[IMAG_PART] ==
00269               ((double*)(BETA ))[IMAG_PART] ) ) ||
00270           ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
00271             ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
00272       {
00273          pad( C2F_CHAR( UPLO ), C2F_CHAR( CONJUG ), &Amp, &Anq, &izero, ALPHA,
00274               BETA, Aptr, &Ald );
00275          return;
00276       }
00277    }
00278 /*
00279 *  Computational partitioning size is computed as the product of the logical
00280 *  value returned by pilaenv_ and two times the least common multiple of nprow
00281 *  and npcol.
00282 */
00283    nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type ) ) *
00284         PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
00285 
00286    mn = MIN( M, N );
00287 
00288    if( Mupcase( UPLO[0] ) == CLOWER )
00289    {
00290 /*
00291 *  Lower triangle of sub( A ): proceed by block of columns. For each block of
00292 *  columns, operate on the logical diagonal block first and then the remaining
00293 *  rows of that block of columns.
00294 */
00295       for( k = 0; k < mn; k += nb )
00296       {
00297          kb   = mn - k; ktmp = k + ( kb = MIN( kb, nb ) );
00298          PB_Cplapd2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, BETA, Aptr, k, k, Ad0 );
00299          Akp  = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
00300          Akq  = PB_Cnumroc( k,    0, Ainb1, Anb, mycol, Acol, npcol );
00301          Anq0 = PB_Cnumroc( kb,   k, Ainb1, Anb, mycol, Acol, npcol );
00302          if( ( Amp0 = Amp - Akp ) > 0 )
00303             pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp0, &Anq0, &izero,
00304                  ALPHA, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
00305       }
00306    }
00307    else if( Mupcase( UPLO[0] ) == CUPPER )
00308    {
00309 /*
00310 *  Upper triangle of sub( A ): proceed by block of columns. For each block of
00311 *  columns, operate on the trailing rows and then the logical diagonal block
00312 *  of that block of columns. When M < N, the last columns of sub( A ) are
00313 *  handled together.
00314 */
00315       for( k = 0; k < mn; k += nb )
00316       {
00317          kb   = mn - k; kb = MIN( kb, nb );
00318          Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
00319          Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
00320          Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
00321          if( Akp > 0 )
00322             pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Akp, &Anq0, &izero,
00323                  ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
00324          PB_Cplapd2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, BETA, Aptr, k, k, Ad0 );
00325       }
00326       if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
00327          pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &Anq, &izero, ALPHA,
00328               ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
00329    }
00330    else
00331    {
00332 /*
00333 *  All of sub( A ): proceed by block of columns. For each block of columns,
00334 *  operate on the trailing rows, then the logical diagonal block, and finally
00335 *  the remaining rows of that block of columns. When M < N, the last columns
00336 *  of sub( A ) are handled together.
00337 */
00338       for( k = 0; k < mn; k += nb )
00339       {
00340          kb   = mn - k; kb = MIN( kb, nb );
00341          Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
00342          Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
00343          Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
00344          if( Akp > 0 )
00345             pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Akp, &Anq0, &izero,
00346                  ALPHA, ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
00347          PB_Cplapd2( TYPE, UPLO, NOCONJG, kb, kb, ALPHA, BETA, Aptr, k, k,
00348                      Ad0 );
00349          Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow );
00350          if( ( Amp0 = Amp - Akp ) > 0 )
00351             pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp0, &Anq0, &izero,
00352                  ALPHA, ALPHA, Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
00353       }
00354       if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
00355          pad( C2F_CHAR( ALL ), C2F_CHAR( NOCONJG ), &Amp, &Anq, &izero, ALPHA,
00356               ALPHA, Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
00357    }
00358 /*
00359 *  End of PB_Cplapad
00360 */
00361 }