ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pctradd_.c
Go to the documentation of this file.
00001 /* ---------------------------------------------------------------------
00002 *
00003 *  -- PBLAS routine (version 2.0) --
00004 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00005 *     and University of California, Berkeley.
00006 *     April 1, 1998
00007 *
00008 *  ---------------------------------------------------------------------
00009 */
00010 /*
00011 *  Include files
00012 */
00013 #include "pblas.h"
00014 #include "PBpblas.h"
00015 #include "PBtools.h"
00016 #include "PBblacs.h"
00017 #include "PBblas.h"
00018 
00019 #ifdef __STDC__
00020 void pctradd_( F_CHAR_T UPLO, F_CHAR_T TRANS, int * M, int * N,
00021                float * ALPHA,
00022                float * A, int * IA, int * JA, int * DESCA,
00023                float * BETA,
00024                float * C, int * IC, int * JC, int * DESCC )
00025 #else
00026 void pctradd_( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA, BETA,
00027                C, IC, JC, DESCC )
00028 /*
00029 *  .. Scalar Arguments ..
00030 */
00031    F_CHAR_T       TRANS, UPLO;
00032    int            * IA, * IC, * JA, * JC, * M, * N;
00033    float          * ALPHA, * BETA;
00034 /*
00035 *  .. Array Arguments ..
00036 */
00037    int            * DESCA, * DESCC;
00038    float          * A, * C;
00039 #endif
00040 {
00041 /*
00042 *  Purpose
00043 *  =======
00044 *
00045 *  PCTRADD  adds a trapezoidal matrix to another
00046 *
00047 *     sub( C ) := beta*sub( C ) + alpha*op( sub( A ) )
00048 *
00049 *  where
00050 *
00051 *     sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1),  and, op( X )  is one  of
00052 *
00053 *     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ).
00054 *
00055 *  Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1)   if TRANS = 'N',
00056 *                               A(IA:IA+N-1,JA:JA+M-1)'  if TRANS = 'T',
00057 *                        conjg(A(IA:IA+N-1,JA:JA+M-1)')  if TRANS = 'C',
00058 *
00059 *  Alpha  and  beta  are scalars, sub( C ) and op( sub( A ) ) are m by n
00060 *  upper or lower trapezoidal submatrices.
00061 *
00062 *  Notes
00063 *  =====
00064 *
00065 *  A description  vector  is associated with each 2D block-cyclicly dis-
00066 *  tributed matrix.  This  vector  stores  the  information  required to
00067 *  establish the  mapping  between a  matrix entry and its corresponding
00068 *  process and memory location.
00069 *
00070 *  In  the  following  comments,   the character _  should  be  read  as
00071 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
00072 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
00073 *
00074 *  NOTATION         STORED IN       EXPLANATION
00075 *  ---------------- --------------- ------------------------------------
00076 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
00077 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
00078 *                                   the NPROW x NPCOL BLACS process grid
00079 *                                   A  is  distributed over. The context
00080 *                                   itself  is  global,  but  the handle
00081 *                                   (the integer value) may vary.
00082 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
00083 *                                   ted matrix A, M_A >= 0.
00084 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
00085 *                                   buted matrix A, N_A >= 0.
00086 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
00087 *                                   block of the matrix A, IMB_A > 0.
00088 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
00089 *                                   left   block   of   the  matrix   A,
00090 *                                   INB_A > 0.
00091 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
00092 *                                   bute the last  M_A-IMB_A  rows of A,
00093 *                                   MB_A > 0.
00094 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
00095 *                                   bute the last  N_A-INB_A  columns of
00096 *                                   A, NB_A > 0.
00097 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
00098 *                                   row of the matrix  A is distributed,
00099 *                                   NPROW > RSRC_A >= 0.
00100 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
00101 *                                   first column of  A  is  distributed.
00102 *                                   NPCOL > CSRC_A >= 0.
00103 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
00104 *                                   array  storing  the  local blocks of
00105 *                                   the distributed matrix A,
00106 *                                   IF( Lc( 1, N_A ) > 0 )
00107 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
00108 *                                   ELSE
00109 *                                      LLD_A >= 1.
00110 *
00111 *  Let K be the number of  rows of a matrix A starting at the global in-
00112 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
00113 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
00114 *  receive if these K rows were distributed over NPROW processes.  If  K
00115 *  is the number of columns of a matrix  A  starting at the global index
00116 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
00117 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
00118 *  these K columns were distributed over NPCOL processes.
00119 *
00120 *  The values of Lr() and Lc() may be determined via a call to the func-
00121 *  tion PB_Cnumroc:
00122 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
00123 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
00124 *
00125 *  Arguments
00126 *  =========
00127 *
00128 *  UPLO    (global input) CHARACTER*1
00129 *          On  entry, UPLO specifies whether  the  local  pieces  of the
00130 *          array C containing the upper or lower triangular part  of the
00131 *          triangular submatrix sub( C ) is to be referenced as follows:
00132 *
00133 *             UPLO = 'U' or 'u'   Only the local pieces corresponding to
00134 *                                 the   upper  triangular  part  of  the
00135 *                                 triangular submatrix sub( C ) is to be
00136 *                                 referenced,
00137 *
00138 *             UPLO = 'L' or 'l'   Only the local pieces corresponding to
00139 *                                 the   lower  triangular  part  of  the
00140 *                                 triangular submatrix sub( C ) is to be
00141 *                                 referenced.
00142 *
00143 *  TRANS   (global input) CHARACTER*1
00144 *          On entry,  TRANS   specifies the form of op( sub( A ) ) to be
00145 *          used in the matrix addition as follows:
00146 *
00147 *             TRANS = 'N' or 'n'   op( sub( A ) ) = sub( A ),
00148 *
00149 *             TRANS = 'T' or 't'   op( sub( A ) ) = sub( A )',
00150 *
00151 *             TRANS = 'C' or 'c'   op( sub( A ) ) = conjg( sub( A )' ).
00152 *
00153 *  M       (global input) INTEGER
00154 *          On entry,  M  specifies the number of rows of  the  submatrix
00155 *          sub( C ) and the number of columns of the submatrix sub( A ).
00156 *          M  must be at least zero.
00157 *
00158 *  N       (global input) INTEGER
00159 *          On entry, N  specifies the number of columns of the submatrix
00160 *          sub( C ) and the number of rows of the submatrix sub( A ).  N
00161 *          must be at least zero.
00162 *
00163 *  ALPHA   (global input) COMPLEX
00164 *          On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
00165 *          supplied  as  zero  then  the  local entries of  the array  A
00166 *          corresponding to the entries of the submatrix  sub( A )  need
00167 *          not be set on input.
00168 *
00169 *  A       (local input) COMPLEX array
00170 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
00171 *          at least Lc( 1, JA+N-1 ) when  TRANS = 'N' or 'n'  and  is at
00172 *          least Lc( 1, JA+M-1 ) otherwise.  Before  entry,  this  array
00173 *          contains the local entries of the matrix A.
00174 *          Before entry with UPLO = 'U' or 'u' and TRANS = 'N' or 'n' or
00175 *          UPLO = 'L' or 'l' and  TRANS = 'T', 'C', 't' or 'c', this ar-
00176 *          ray contains the local entries corresponding to  the  entries
00177 *          of the upper triangular submatrix sub( A ), and the local en-
00178 *          tries corresponding to the entries of the strictly lower tri-
00179 *          angular part  of the submatrix  sub( A )  are not referenced.
00180 *          Before entry with UPLO = 'L' or 'l' and TRANS = 'N' or 'n' or
00181 *          UPLO = 'U' or 'u' and  TRANS = 'T', 'C', 't' or 'c', this ar-
00182 *          ray contains the local entries corresponding  to the  entries
00183 *          of the lower triangular submatrix sub( A ), and the local en-
00184 *          tries corresponding to the entries of the strictly upper tri-
00185 *          angular part  of the submatrix  sub( A )  are not referenced.
00186 *
00187 *  IA      (global input) INTEGER
00188 *          On entry, IA  specifies A's global row index, which points to
00189 *          the beginning of the submatrix sub( A ).
00190 *
00191 *  JA      (global input) INTEGER
00192 *          On entry, JA  specifies A's global column index, which points
00193 *          to the beginning of the submatrix sub( A ).
00194 *
00195 *  DESCA   (global and local input) INTEGER array
00196 *          On entry, DESCA  is an integer array of dimension DLEN_. This
00197 *          is the array descriptor for the matrix A.
00198 *
00199 *  BETA    (global input) COMPLEX
00200 *          On entry,  BETA  specifies the scalar  beta.   When  BETA  is
00201 *          supplied  as  zero  then  the  local entries of  the array  C
00202 *          corresponding to the entries of the submatrix  sub( C )  need
00203 *          not be set on input.
00204 *
00205 *  C       (local input/local output) COMPLEX array
00206 *          On entry, C is an array of dimension (LLD_C, Kc), where Kc is
00207 *          at least Lc( 1, JC+N-1 ).  Before  entry, this array contains
00208 *          the local entries of the matrix C.
00209 *          Before  entry  with  UPLO = 'U' or 'u', this  array  contains
00210 *          the local entries corresponding to the upper triangular  part
00211 *          of the  triangular submatrix  sub( C ), and the local entries
00212 *          corresponding to the  strictly lower triangular  of  sub( C )
00213 *          are not  referenced.  On exit,  the upper triangular part  of
00214 *          sub( C ) is overwritten by the  upper triangular part  of the
00215 *          updated submatrix.
00216 *          Before  entry  with  UPLO = 'L' or 'l', this  array  contains
00217 *          the local entries corresponding to the lower triangular  part
00218 *          of the  triangular submatrix  sub( C ), and the local entries
00219 *          corresponding to the  strictly upper triangular  of  sub( C )
00220 *          are not  referenced.  On exit,  the lower triangular part  of
00221 *          sub( C ) is overwritten by the  lower triangular part  of the
00222 *          updated submatrix.
00223 *
00224 *  IC      (global input) INTEGER
00225 *          On entry, IC  specifies C's global row index, which points to
00226 *          the beginning of the submatrix sub( C ).
00227 *
00228 *  JC      (global input) INTEGER
00229 *          On entry, JC  specifies C's global column index, which points
00230 *          to the beginning of the submatrix sub( C ).
00231 *
00232 *  DESCC   (global and local input) INTEGER array
00233 *          On entry, DESCC  is an integer array of dimension DLEN_. This
00234 *          is the array descriptor for the matrix C.
00235 *
00236 *  -- Written on April 1, 1998 by
00237 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
00238 *
00239 *  ---------------------------------------------------------------------
00240 */
00241 /*
00242 *  .. Local Scalars ..
00243 */
00244    char           DirAC, TranOp, UploC, ctop, rtop;
00245    int            Ai, Aj, Ci, Cj, ctxt, info, mycol, myrow, notran, npcol,
00246                   nprow, upper;
00247 /*
00248 *  .. Local Arrays ..
00249 */
00250    int            Ad[DLEN_], Cd[DLEN_];
00251 /* ..
00252 *  .. Executable Statements ..
00253 *
00254 */
00255    upper  = ( ( UploC  = Mupcase( F2C_CHAR( UPLO  )[0] ) ) ==  CUPPER );
00256    notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
00257    PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
00258    PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd );
00259 #ifndef NO_ARGCHK
00260 /*
00261 *  Test the input parameters
00262 */
00263    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
00264    if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) )
00265    {
00266       if( ( !upper ) && ( UploC != CLOWER ) )
00267       {
00268          PB_Cwarn( ctxt, __LINE__, "PCTRADD", "Illegal UPLO = %c\n", UploC );
00269          info = -1;
00270       }
00271       else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
00272       {
00273          PB_Cwarn( ctxt, __LINE__, "PCTRADD", "Illegal TRANS = %c\n", TranOp );
00274          info = -2;
00275       }
00276       if( notran )
00277          PB_Cchkmat( ctxt, "PCTRADD", "A", *M, 3, *N, 4, Ai, Aj, Ad,  9,
00278                      &info );
00279       else
00280          PB_Cchkmat( ctxt, "PCTRADD", "A", *N, 4, *M, 3, Ai, Aj, Ad,  9,
00281                      &info );
00282       PB_Cchkmat(    ctxt, "PCTRADD", "C", *M, 3, *N, 4, Ci, Cj, Cd, 14,
00283                      &info );
00284    }
00285    if( info ) { PB_Cabort( ctxt, "PCTRADD", info ); return; }
00286 #endif
00287 /*
00288 *  Quick return if possible
00289 */
00290    if( ( *M == 0 ) || ( *N == 0 ) ||
00291        ( ( ALPHA[REAL_PART] == ZERO && ALPHA[IMAG_PART] == ZERO ) &&
00292          ( BETA [REAL_PART] ==  ONE && BETA [IMAG_PART] == ZERO ) ) )
00293       return;
00294 /*
00295 *  And when alpha is zero
00296 */
00297    if( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) )
00298    {
00299       if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
00300       {
00301          PB_Cplapad( PB_Cctypeset(), &UploC, NOCONJG, *M, *N,
00302                      ((char *)BETA), ((char *)BETA), ((char *) C), Ci, Cj, Cd );
00303       }
00304       else
00305       {
00306          PB_Cplascal( PB_Cctypeset(), &UploC, NOCONJG, *M, *N,
00307                       ((char *)BETA), ((char * )C), Ci, Cj, Cd );
00308       }
00309       return;
00310    }
00311 /*
00312 *  Start the operations
00313 */
00314 /*
00315 *  This operation mainly involves point-to-point send and receive communication.
00316 *  There is therefore no particular BLACS topology to recommend. Still, one can
00317 *  choose the main loop direction in which the operands will be added, but not
00318 *  transposed. This selection is based on the current setting for the BLACS
00319 *  broadcast operations.
00320 */
00321    rtop = *PB_Ctop( &ctxt, BCAST, ROW,    TOP_GET );
00322    ctop = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
00323 
00324    if( *M <= *N )
00325       DirAC = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
00326    else
00327       DirAC = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
00328    PB_Cptradd( PB_Cctypeset(), &DirAC, &UploC, ( notran ? NOTRAN :
00329                ( ( TranOp == CCOTRAN ) ? COTRAN : TRAN ) ), *M, *N,
00330                ((char *) ALPHA), ((char *) A), Ai, Aj, Ad, ((char *)  BETA),
00331                ((char *) C), Ci, Cj, Cd );
00332 /*
00333 *  End of PCTRADD
00334 */
00335 }