ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
PB_Cplascal.c
Go to the documentation of this file.
00001 /* ---------------------------------------------------------------------
00002 *
00003 *  -- PBLAS auxiliary routine (version 2.0) --
00004 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00005 *     and University of California, Berkeley.
00006 *     April 1, 1998
00007 *
00008 *  ---------------------------------------------------------------------
00009 */
00010 /*
00011 *  Include files
00012 */
00013 #include "../pblas.h"
00014 #include "../PBpblas.h"
00015 #include "../PBtools.h"
00016 #include "../PBblacs.h"
00017 #include "../PBblas.h"
00018 
00019 #ifdef __STDC__
00020 void PB_Cplascal( PBTYP_T * TYPE, char * UPLO, char * CONJUG, int M,
00021                   int N, char * ALPHA, char * A, int IA, int JA,
00022                   int * DESCA )
00023 #else
00024 void PB_Cplascal( TYPE, UPLO, CONJUG, M, N, ALPHA, A, IA, JA, DESCA )
00025 /*
00026 *  .. Scalar Arguments ..
00027 */
00028    char           * CONJUG, * UPLO;
00029    int            IA, JA, M, N;
00030    char           * ALPHA;
00031    PBTYP_T        * TYPE;
00032 /*
00033 *  .. Array Arguments ..
00034 */
00035    int            * DESCA;
00036    char           * A;
00037 #endif
00038 {
00039 /*
00040 *  Purpose
00041 *  =======
00042 *
00043 *  PB_Cplascal scales by alpha an  m by n  submatrix  sub( A )  denoting
00044 *  A(IA:IA+M-1,JA:JA+N-1).
00045 *
00046 *  Notes
00047 *  =====
00048 *
00049 *  A description  vector  is associated with each 2D block-cyclicly dis-
00050 *  tributed matrix.  This  vector  stores  the  information  required to
00051 *  establish the  mapping  between a  matrix entry and its corresponding
00052 *  process and memory location.
00053 *
00054 *  In  the  following  comments,   the character _  should  be  read  as
00055 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
00056 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
00057 *
00058 *  NOTATION         STORED IN       EXPLANATION
00059 *  ---------------- --------------- ------------------------------------
00060 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
00061 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
00062 *                                   the NPROW x NPCOL BLACS process grid
00063 *                                   A  is  distributed over. The context
00064 *                                   itself  is  global,  but  the handle
00065 *                                   (the integer value) may vary.
00066 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
00067 *                                   ted matrix A, M_A >= 0.
00068 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
00069 *                                   buted matrix A, N_A >= 0.
00070 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
00071 *                                   block of the matrix A, IMB_A > 0.
00072 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
00073 *                                   left   block   of   the  matrix   A,
00074 *                                   INB_A > 0.
00075 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
00076 *                                   bute the last  M_A-IMB_A  rows of A,
00077 *                                   MB_A > 0.
00078 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
00079 *                                   bute the last  N_A-INB_A  columns of
00080 *                                   A, NB_A > 0.
00081 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
00082 *                                   row of the matrix  A is distributed,
00083 *                                   NPROW > RSRC_A >= 0.
00084 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
00085 *                                   first column of  A  is  distributed.
00086 *                                   NPCOL > CSRC_A >= 0.
00087 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
00088 *                                   array  storing  the  local blocks of
00089 *                                   the distributed matrix A,
00090 *                                   IF( Lc( 1, N_A ) > 0 )
00091 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
00092 *                                   ELSE
00093 *                                      LLD_A >= 1.
00094 *
00095 *  Let K be the number of  rows of a matrix A starting at the global in-
00096 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
00097 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
00098 *  receive if these K rows were distributed over NPROW processes.  If  K
00099 *  is the number of columns of a matrix  A  starting at the global index
00100 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
00101 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
00102 *  these K columns were distributed over NPCOL processes.
00103 *
00104 *  The values of Lr() and Lc() may be determined via a call to the func-
00105 *  tion PB_Cnumroc:
00106 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
00107 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
00108 *
00109 *  Arguments
00110 *  =========
00111 *
00112 *  TYPE    (local input) pointer to a PBTYP_T structure
00113 *          On entry,  TYPE  is a pointer to a structure of type PBTYP_T,
00114 *          that contains type information (See pblas.h).
00115 *
00116 *  UPLO    (global input) pointer to CHAR
00117 *          On entry, UPLO specifies the part  of  the submatrix sub( A )
00118 *          to be scaled as follows:
00119 *             = 'L' or 'l':         Lower triangular part is scaled; the
00120 *             strictly upper triangular part of sub( A ) is not changed;
00121 *             = 'U' or 'u':         Upper triangular part is scaled; the
00122 *             strictly lower triangular part of sub( A ) is not changed;
00123 *             Otherwise:  All of the submatrix sub( A ) is scaled.
00124 *
00125 *  CONJUG  (global input) pointer to CHAR
00126 *          On entry,  CONJUG  specifies  what  kind of scaling should be
00127 *          done as follows: when UPLO is 'L', 'l', 'U' or 'u' and CONJUG
00128 *          is 'Z' or 'z', alpha is assumed to be real and the  imaginary
00129 *          part of the diagonals are set to zero. Otherwise, alpha is of
00130 *          the same type as the entries of sub( A ) and nothing particu-
00131 *          lar is done to the diagonals of sub( A ).
00132 *
00133 *  M       (global input) INTEGER
00134 *          On entry,  M  specifies the number of rows of  the  submatrix
00135 *          sub( A ). M  must be at least zero.
00136 *
00137 *  N       (global input) INTEGER
00138 *          On entry, N  specifies the number of columns of the submatrix
00139 *          sub( A ). N must be at least zero.
00140 *
00141 *  ALPHA   (global input) pointer to CHAR
00142 *          On entry,  ALPHA  specifies the scalar alpha, i.e., the cons-
00143 *          tant with which the matrix elements are to be scaled.
00144 *
00145 *  A       (local input/local output) pointer to CHAR
00146 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
00147 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
00148 *          the local entries of the matrix A to be scaled.  On exit, the
00149 *          local  entries  of this array corresponding to the to the en-
00150 *          tries of the submatrix sub( A ) are  overwritten by the local
00151 *          entries of the m by n scaled submatrix.
00152 *
00153 *  IA      (global input) INTEGER
00154 *          On entry, IA  specifies A's global row index, which points to
00155 *          the beginning of the submatrix sub( A ).
00156 *
00157 *  JA      (global input) INTEGER
00158 *          On entry, JA  specifies A's global column index, which points
00159 *          to the beginning of the submatrix sub( A ).
00160 *
00161 *  DESCA   (global and local input) INTEGER array
00162 *          On entry, DESCA  is an integer array of dimension DLEN_. This
00163 *          is the array descriptor for the matrix A.
00164 *
00165 *  -- Written on April 1, 1998 by
00166 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
00167 *
00168 *  ---------------------------------------------------------------------
00169 */
00170 /*
00171 *  .. Local Scalars ..
00172 */
00173    char           UploA, herm, type;
00174    int            Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Ald, Amb, Amp,
00175                   Amp0, Anb, Anq, Anq0, ctxt, izero=0, k, kb, ktmp, mn, mycol,
00176                   myrow, nb, npcol, nprow, size;
00177    TZSCAL_T       scal;
00178 /*
00179 *  .. Local Arrays ..
00180 */
00181    int            Ad0[DLEN_];
00182    char           * Aptr = NULL;
00183 /* ..
00184 *  .. Executable Statements ..
00185 *
00186 */
00187 /*
00188 *  Quick return if possible
00189 */
00190    if( ( M <= 0 ) || ( N <= 0 ) ) return;
00191 /*
00192 *  If alpha is zero, then call PB_Cplapad instead.
00193 */
00194    type  = TYPE->type;
00195    UploA = Mupcase( UPLO[0] );
00196    herm  = ( UploA == CALL ? CNOCONJG : Mupcase( CONJUG[0] ) );
00197 
00198    if( type == SREAL )
00199    {
00200       if( ((float*)(ALPHA))[REAL_PART] == ZERO )
00201       {
00202          PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA,
00203                      JA, DESCA );
00204          return;
00205       }
00206       else if( ((float*)(ALPHA))[REAL_PART] == ONE ) return;
00207    }
00208    else if( type == DREAL )
00209    {
00210       if( ((double*)(ALPHA))[REAL_PART] == ZERO )
00211       {
00212          PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A, IA,
00213                      JA, DESCA );
00214          return;
00215       }
00216       else if( ((double*)(ALPHA))[REAL_PART] == ONE ) return;
00217    }
00218    else if( type == SCPLX )
00219    {
00220       if( herm == CCONJG )
00221       {
00222          if( ((float*)(ALPHA))[REAL_PART] == ZERO )
00223          {
00224             PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
00225                         IA, JA, DESCA );
00226             return;
00227          }
00228       }
00229       else
00230       {
00231          if( ((float*)(ALPHA))[IMAG_PART] == ZERO )
00232          {
00233             if( ((float*)(ALPHA))[REAL_PART] == ZERO )
00234             {
00235                PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
00236                            IA, JA, DESCA );
00237                return;
00238             }
00239             else if( ((float*)(ALPHA))[REAL_PART] == ONE ) return;
00240          }
00241       }
00242    }
00243    else if( type == DCPLX )
00244    {
00245       if( herm == CCONJG )
00246       {
00247          if( ((double*)(ALPHA))[REAL_PART] == ZERO )
00248          {
00249             PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
00250                         IA, JA, DESCA );
00251             return;
00252          }
00253       }
00254       else
00255       {
00256          if( ((double*)(ALPHA))[IMAG_PART] == ZERO )
00257          {
00258             if( ((double*)(ALPHA))[REAL_PART] == ZERO )
00259             {
00260                PB_Cplapad( TYPE, UPLO, NOCONJG, M, N, TYPE->zero, TYPE->zero, A,
00261                            IA, JA, DESCA );
00262                return;
00263             }
00264             else if( ((double*)(ALPHA))[REAL_PART] == ONE ) return;
00265          }
00266       }
00267    }
00268 /*
00269 *  Retrieve process grid information
00270 */
00271    Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
00272 /*
00273 *  Compute descriptor Ad0 for sub( A )
00274 */
00275    PB_Cdescribe( M, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
00276                  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
00277 /*
00278 *  Quick return if I don't own any of sub( A ).
00279 */
00280    Amp  = PB_Cnumroc( M, 0, Aimb1, Amb, myrow, Arow, nprow );
00281    Anq  = PB_Cnumroc( N, 0, Ainb1, Anb, mycol, Acol, npcol );
00282    if( ( Amp <= 0 ) || ( Anq <= 0 ) ) return;
00283 
00284    size = TYPE->size;
00285    scal = ( herm == CCONJG ? TYPE->Fhescal : TYPE->Ftzscal );
00286    Aptr = Mptr( A, Aii, Ajj, Ald, size );
00287 /*
00288 *  When the entire sub( A ) needs to be scaled or when sub( A ) is replicated in
00289 *  all processes, just call the local routine.
00290 */
00291    if( ( Mupcase( UPLO[0] ) == CALL ) ||
00292        ( ( ( Arow < 0 ) || ( nprow == 1 ) ) &&
00293          ( ( Acol < 0 ) || ( npcol == 1 ) ) ) )
00294    {
00295       scal( C2F_CHAR( UPLO ), &Amp, &Anq, &izero, ALPHA, Aptr, &Ald );
00296       return;
00297    }
00298 /*
00299 *  Computational partitioning size is computed as the product of the logical
00300 *  value returned by pilaenv_ and two times the least common multiple of nprow
00301 *  and npcol.
00302 */
00303    nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type ) ) *
00304         PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
00305 
00306    mn = MIN( M, N );
00307 
00308    if( Mupcase( UPLO[0] ) == CLOWER )
00309    {
00310 /*
00311 *  Lower triangle of sub( A ): proceed by block of columns. For each block of
00312 *  columns, operate on the logical diagonal block first and then the remaining
00313 *  rows of that block of columns.
00314 */
00315       for( k = 0; k < mn; k += nb )
00316       {
00317          kb   = mn - k; ktmp = k + ( kb = MIN( kb, nb ) );
00318          PB_Cplasca2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, Aptr, k, k, Ad0 );
00319          Akp  = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
00320          Akq  = PB_Cnumroc( k,    0, Ainb1, Anb, mycol, Acol, npcol );
00321          Anq0 = PB_Cnumroc( kb,   k, Ainb1, Anb, mycol, Acol, npcol );
00322          if( ( Amp0 = Amp - Akp ) > 0 )
00323             scal( C2F_CHAR( ALL ), &Amp0, &Anq0, &izero, ALPHA, Mptr( Aptr,
00324                   Akp, Akq, Ald, size ), &Ald );
00325       }
00326    }
00327    else if( Mupcase( UPLO[0] ) == CUPPER )
00328    {
00329 /*
00330 *  Upper triangle of sub( A ): proceed by block of columns. For each block of
00331 *  columns, operate on the trailing rows and then the logical diagonal block
00332 *  of that block of columns. When M < N, the last columns of sub( A ) are
00333 *  handled together.
00334 */
00335       for( k = 0; k < mn; k += nb )
00336       {
00337          kb   = mn - k; kb = MIN( kb, nb );
00338          Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
00339          Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
00340          Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
00341          if( Akp > 0 )
00342             scal( C2F_CHAR( ALL ), &Akp, &Anq0, &izero, ALPHA, Mptr( Aptr,
00343                   0, Akq, Ald, size ), &Ald );
00344          PB_Cplasca2( TYPE, UPLO, CONJUG, kb, kb, ALPHA, Aptr, k, k, Ad0 );
00345       }
00346       if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
00347          scal( C2F_CHAR( ALL ), &Amp, &Anq, &izero, ALPHA, Mptr( Aptr, 0,
00348                Akq, Ald, size ), &Ald );
00349    }
00350    else
00351    {
00352 /*
00353 *  All of sub( A ): proceed by block of columns. For each block of columns,
00354 *  operate on the trailing rows, then the logical diagonal block, and finally
00355 *  the remaining rows of that block of columns. When M < N, the last columns
00356 *  of sub( A ) are handled together.
00357 */
00358       for( k = 0; k < mn; k += nb )
00359       {
00360          kb   = mn - k; kb = MIN( kb, nb );
00361          Akp  = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
00362          Akq  = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
00363          Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
00364          if( Akp > 0 )
00365             scal( C2F_CHAR( ALL ), &Akp, &Anq0, &izero, ALPHA, Mptr( Aptr,
00366                   0, Akq, Ald, size ), &Ald );
00367          PB_Cplasca2( TYPE, UPLO, NOCONJG, kb, kb, ALPHA, Aptr, k, k, Ad0 );
00368          Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow );
00369          if( ( Amp0 = Amp - Akp ) > 0 )
00370             scal( C2F_CHAR( ALL ), &Amp0, &Anq0, &izero, ALPHA, Mptr( Aptr,
00371                   Akp, Akq, Ald, size ), &Ald );
00372       }
00373       if( ( Anq -= ( Akq += Anq0 ) ) > 0 )
00374          scal( C2F_CHAR( ALL ), &Amp, &Anq, &izero, ALPHA, Mptr( Aptr, 0,
00375                Akq, Ald, size ), &Ald );
00376    }
00377 /*
00378 *  End of PB_Cplascal
00379 */
00380 }