ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pctrsv_.c
Go to the documentation of this file.
00001 /* ---------------------------------------------------------------------
00002 *
00003 *  -- PBLAS routine (version 2.0) --
00004 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
00005 *     and University of California, Berkeley.
00006 *     April 1, 1998
00007 *
00008 *  ---------------------------------------------------------------------
00009 */
00010 /*
00011 *  Include files
00012 */
00013 #include "pblas.h"
00014 #include "PBpblas.h"
00015 #include "PBtools.h"
00016 #include "PBblacs.h"
00017 #include "PBblas.h"
00018 
00019 #ifdef __STDC__
00020 void pctrsv_( F_CHAR_T UPLO, F_CHAR_T TRANS, F_CHAR_T DIAG, int * N,
00021               float * A, int * IA, int * JA, int * DESCA,
00022               float * X, int * IX, int * JX, int * DESCX,
00023               int * INCX )
00024 #else
00025 void pctrsv_( UPLO, TRANS, DIAG, N, A, IA, JA, DESCA, X, IX, JX,
00026               DESCX, INCX )
00027 /*
00028 *  .. Scalar Arguments ..
00029 */
00030    F_CHAR_T       DIAG, TRANS, UPLO;
00031    int            * IA, * INCX, * IX, * JA, * JX, * N;
00032 /*
00033 *  .. Array Arguments ..
00034 */
00035    int            * DESCA, * DESCX;
00036    float          * A, * X;
00037 #endif
00038 {
00039 /*
00040 *  Purpose
00041 *  =======
00042 *
00043 *  PCTRSV  solves one of the systems of equations
00044 *
00045 *    sub( A )*sub( X ) = b,   or   sub( A )'*sub( X ) = b,   or
00046 *
00047 *    conjg( sub( A )' )*sub( X ) = b,
00048 *
00049 *  where
00050 *
00051 *     sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1), and,
00052 *
00053 *     sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
00054 *                      X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X.
00055 *
00056 *  b and sub( X ) are  n element subvectors and  sub( A )  is an  n by n
00057 *  unit, or non-unit, upper or lower triangular submatrix.
00058 *
00059 *  No test for  singularity  or  near-singularity  is included  in  this
00060 *  routine. Such tests must be performed before calling this routine.
00061 *
00062 *  Notes
00063 *  =====
00064 *
00065 *  A description  vector  is associated with each 2D block-cyclicly dis-
00066 *  tributed matrix.  This  vector  stores  the  information  required to
00067 *  establish the  mapping  between a  matrix entry and its corresponding
00068 *  process and memory location.
00069 *
00070 *  In  the  following  comments,   the character _  should  be  read  as
00071 *  "of  the  distributed  matrix".  Let  A  be a generic term for any 2D
00072 *  block cyclicly distributed matrix.  Its description vector is DESC_A:
00073 *
00074 *  NOTATION         STORED IN       EXPLANATION
00075 *  ---------------- --------------- ------------------------------------
00076 *  DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
00077 *  CTXT_A  (global) DESCA[ CTXT_  ] The BLACS context handle, indicating
00078 *                                   the NPROW x NPCOL BLACS process grid
00079 *                                   A  is  distributed over. The context
00080 *                                   itself  is  global,  but  the handle
00081 *                                   (the integer value) may vary.
00082 *  M_A     (global) DESCA[ M_     ] The  number of rows in the distribu-
00083 *                                   ted matrix A, M_A >= 0.
00084 *  N_A     (global) DESCA[ N_     ] The number of columns in the distri-
00085 *                                   buted matrix A, N_A >= 0.
00086 *  IMB_A   (global) DESCA[ IMB_   ] The number of rows of the upper left
00087 *                                   block of the matrix A, IMB_A > 0.
00088 *  INB_A   (global) DESCA[ INB_   ] The  number  of columns of the upper
00089 *                                   left   block   of   the  matrix   A,
00090 *                                   INB_A > 0.
00091 *  MB_A    (global) DESCA[ MB_    ] The blocking factor used to  distri-
00092 *                                   bute the last  M_A-IMB_A  rows of A,
00093 *                                   MB_A > 0.
00094 *  NB_A    (global) DESCA[ NB_    ] The blocking factor used to  distri-
00095 *                                   bute the last  N_A-INB_A  columns of
00096 *                                   A, NB_A > 0.
00097 *  RSRC_A  (global) DESCA[ RSRC_  ] The process row over which the first
00098 *                                   row of the matrix  A is distributed,
00099 *                                   NPROW > RSRC_A >= 0.
00100 *  CSRC_A  (global) DESCA[ CSRC_  ] The  process column  over  which the
00101 *                                   first column of  A  is  distributed.
00102 *                                   NPCOL > CSRC_A >= 0.
00103 *  LLD_A   (local)  DESCA[ LLD_   ] The  leading dimension  of the local
00104 *                                   array  storing  the  local blocks of
00105 *                                   the distributed matrix A,
00106 *                                   IF( Lc( 1, N_A ) > 0 )
00107 *                                      LLD_A >= MAX( 1, Lr( 1, M_A ) )
00108 *                                   ELSE
00109 *                                      LLD_A >= 1.
00110 *
00111 *  Let K be the number of  rows of a matrix A starting at the global in-
00112 *  dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
00113 *  that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
00114 *  receive if these K rows were distributed over NPROW processes.  If  K
00115 *  is the number of columns of a matrix  A  starting at the global index
00116 *  JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number  of co-
00117 *  lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would  receive if
00118 *  these K columns were distributed over NPCOL processes.
00119 *
00120 *  The values of Lr() and Lc() may be determined via a call to the func-
00121 *  tion PB_Cnumroc:
00122 *  Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
00123 *  Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
00124 *
00125 *  Arguments
00126 *  =========
00127 *
00128 *  UPLO    (global input) CHARACTER*1
00129 *          On entry,  UPLO  specifies whether the submatrix  sub( A ) is
00130 *          an upper or lower triangular submatrix as follows:
00131 *
00132 *             UPLO = 'U' or 'u'   sub( A ) is an upper triangular
00133 *                                 submatrix,
00134 *
00135 *             UPLO = 'L' or 'l'   sub( A ) is a  lower triangular
00136 *                                 submatrix.
00137 *
00138 *  TRANS   (global input) CHARACTER*1
00139 *          On entry,  TRANS  specifies the  operation to be performed as
00140 *          follows:
00141 *
00142 *             TRANS = 'N' or 'n'   sub( A )  * sub( X ) = b.
00143 *
00144 *             TRANS = 'T' or 't'   sub( A )' * sub( X ) = b.
00145 *
00146 *             TRANS = 'C' or 'c'   conjg( sub( A )' ) * sub( X ) = b.
00147 *
00148 *  DIAG    (global input) CHARACTER*1
00149 *          On entry,  DIAG  specifies  whether or not  sub( A )  is unit
00150 *          triangular as follows:
00151 *
00152 *             DIAG = 'U' or 'u'  sub( A )  is  assumed to be unit trian-
00153 *                                gular,
00154 *
00155 *             DIAG = 'N' or 'n'  sub( A ) is not assumed to be unit tri-
00156 *                                angular.
00157 *
00158 *  N       (global input) INTEGER
00159 *          On entry,  N specifies the order of the  submatrix  sub( A ).
00160 *          N must be at least zero.
00161 *
00162 *  A       (local input) COMPLEX array
00163 *          On entry, A is an array of dimension (LLD_A, Ka), where Ka is
00164 *          at least Lc( 1, JA+N-1 ).  Before  entry, this array contains
00165 *          the local entries of the matrix A.
00166 *          Before entry with  UPLO = 'U' or 'u', this array contains the
00167 *          local entries corresponding to  the entries of the upper tri-
00168 *          angular submatrix  sub( A ), and the local entries correspon-
00169 *          ding to the entries of the  strictly lower triangular part of
00170 *          the submatrix  sub( A )  are not referenced.
00171 *          Before entry with  UPLO = 'L' or 'l', this array contains the
00172 *          local entries corresponding to  the entries of the lower tri-
00173 *          angular submatrix  sub( A ), and the local entries correspon-
00174 *          ding to the entries of the  strictly upper triangular part of
00175 *          the submatrix  sub( A )  are not referenced.
00176 *          Note  that  when DIAG = 'U' or 'u', the local entries corres-
00177 *          ponding to  the diagonal elements  of the submatrix  sub( A )
00178 *          are not referenced either, but are assumed to be unity.
00179 *
00180 *  IA      (global input) INTEGER
00181 *          On entry, IA  specifies A's global row index, which points to
00182 *          the beginning of the submatrix sub( A ).
00183 *
00184 *  JA      (global input) INTEGER
00185 *          On entry, JA  specifies A's global column index, which points
00186 *          to the beginning of the submatrix sub( A ).
00187 *
00188 *  DESCA   (global and local input) INTEGER array
00189 *          On entry, DESCA  is an integer array of dimension DLEN_. This
00190 *          is the array descriptor for the matrix A.
00191 *
00192 *  X       (local input/local output) COMPLEX array
00193 *          On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
00194 *          is   at  least  MAX( 1, Lr( 1, IX ) )  when  INCX = M_X   and
00195 *          MAX( 1, Lr( 1, IX+N-1 ) )  otherwise,  and,  Kx  is  at least
00196 *          Lc( 1, JX+N-1 )  when  INCX = M_X  and Lc( 1, JX ) otherwise.
00197 *          Before entry, this array  contains  the local  entries of the
00198 *          matrix X. On entry, sub( X ) is the n element right-hand side
00199 *          b. On exit, sub( X ) is overwritten with the solution subvec-
00200 *          tor.
00201 *
00202 *  IX      (global input) INTEGER
00203 *          On entry, IX  specifies X's global row index, which points to
00204 *          the beginning of the submatrix sub( X ).
00205 *
00206 *  JX      (global input) INTEGER
00207 *          On entry, JX  specifies X's global column index, which points
00208 *          to the beginning of the submatrix sub( X ).
00209 *
00210 *  DESCX   (global and local input) INTEGER array
00211 *          On entry, DESCX  is an integer array of dimension DLEN_. This
00212 *          is the array descriptor for the matrix X.
00213 *
00214 *  INCX    (global input) INTEGER
00215 *          On entry,  INCX   specifies  the  global  increment  for  the
00216 *          elements of  X.  Only two values of  INCX   are  supported in
00217 *          this version, namely 1 and M_X. INCX  must not be zero.
00218 *
00219 *  -- Written on April 1, 1998 by
00220 *     Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
00221 *
00222 *  ---------------------------------------------------------------------
00223 */
00224 /*
00225 *  .. Local Scalars ..
00226 */
00227    char           DiagA, TranOp, UploA, Xroc, btop, ctop, * negone, * one,
00228                   * zero;
00229    int            Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb, Anb,
00230                   Anp, Anp0, Anq, Anq0, Arow, Asrc, XACapbX, XACfr, XACld,
00231                   XACsum, XARapbX, XARfr, XARld, XARsum, Xi, Xj, ctxt, info,
00232                   ione=1, k, kb, kbnext, kbprev, ktmp, mycol, myrow, nb, notran,
00233                   nounit, npcol, nprow, size, upper;
00234    PBTYP_T        * type;
00235 /*
00236 *  .. Local Arrays ..
00237 */
00238    int            Ad[DLEN_], Ad0[DLEN_], XACd[DLEN_], XARd[DLEN_], Xd[DLEN_];
00239    char           * Aptr = NULL, * XAC = NULL, * XAR = NULL;
00240 /* ..
00241 *  .. Executable Statements ..
00242 *
00243 */
00244    upper  = ( ( UploA  = Mupcase( F2C_CHAR( UPLO  )[0] ) ) ==  CUPPER );
00245    notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
00246    nounit = ( ( DiagA  = Mupcase( F2C_CHAR( DIAG  )[0] ) ) == CNOUNIT );
00247    PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
00248    PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
00249 #ifndef NO_ARGCHK
00250 /*
00251 *  Test the input parameters
00252 */
00253    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
00254    if( !( info = ( ( nprow == -1 ) ? -( 801 + CTXT_ ) : 0 ) ) )
00255    {
00256       if( ( !upper ) && ( UploA != CLOWER ) )
00257       {
00258          PB_Cwarn( ctxt, __LINE__, "PCTRSV", "Illegal UPLO = %c\n", UploA );
00259          info = -1;
00260       }
00261       else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
00262       {
00263          PB_Cwarn( ctxt, __LINE__, "PCTRSV", "Illegal TRANS = %c\n", TranOp );
00264          info = -2;
00265       }
00266       else if( ( !nounit ) && ( DiagA != CUNIT ) )
00267       {
00268          PB_Cwarn( ctxt, __LINE__, "PCTRSV", "Illegal DIAG = %c\n", DiagA );
00269          info = -3;
00270       }
00271       PB_Cchkmat( ctxt, "PCTRSV", "A", *N, 4, *N, 4, Ai, Aj, Ad,  8, &info );
00272       PB_Cchkvec( ctxt, "PCTRSV", "X", *N, 4, Xi, Xj, Xd, *INCX, 12, &info );
00273    }
00274    if( info ) { PB_Cabort( ctxt, "PCTRSV", info ); return; }
00275 #endif
00276 /*
00277 *  Quick return if possible
00278 */
00279    if( *N == 0 ) return;
00280 /*
00281 *  Retrieve process grid information
00282 */
00283 #ifdef NO_ARGCHK
00284    Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
00285 #endif
00286 /*
00287 *  Get type structure
00288 */
00289    type = PB_Cctypeset();
00290    size = type->size; one = type->one; zero = type->zero; negone = type->negone;
00291 /*
00292 *  Compute descriptor Ad0 for sub( A )
00293 */
00294    PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
00295                  &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
00296 /*
00297 *  Computational partitioning size is computed as the product of the logical
00298 *  value returned by pilaenv_ and 2 * lcm( nprow, npcol )
00299 */
00300    nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) *
00301         PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
00302 
00303    Xroc = ( *INCX == Xd[M_] ? CROW : CCOLUMN );
00304 
00305    if( notran )
00306    {
00307       if( upper )
00308       {
00309 /*
00310 *  Save current and enforce ring BLACS topologies
00311 */
00312          btop = *PB_Ctop( &ctxt, BCAST,   COLUMN, TOP_GET   );
00313          ctop = *PB_Ctop( &ctxt, COMBINE, ROW,    TOP_GET   );
00314          (void)  PB_Ctop( &ctxt, BCAST,   COLUMN, TOP_DRING );
00315          (void)  PB_Ctop( &ctxt, COMBINE, ROW,    TOP_DRING );
00316 /*
00317 *  Remove next line when BLACS combine operations support ring topologies.
00318 */
00319          (void)  PB_Ctop( &ctxt, COMBINE, ROW,    TOP_DEFAULT );
00320 /*
00321 *  Reuse sub( X ) and/or create vector XAC in process column owning the last
00322 *  column of sub( A )
00323 */
00324          PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, *N-1, Ad0, 1,
00325                       ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd,
00326                       &XACfr, &XACsum, &XACapbX );
00327 /*
00328 *  Create vector XAR in process rows spanned by sub( A )
00329 */
00330          PB_COutV( type, ROW,    INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr,
00331                    &XARsum );
00332 /*
00333 *  Retrieve local quantities related to Ad0 -> sub( A )
00334 */
00335          Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ];
00336          Amb   = Ad0[MB_  ]; Anb   = Ad0[NB_  ];
00337          Arow  = Ad0[RSRC_]; Acol  = Ad0[CSRC_]; Ald = Ad0[LLD_];
00338          Anp   = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
00339          Anq   = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
00340          if( ( Anp > 0 ) && ( Anq > 0 ) )
00341             Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
00342 
00343          XACld = XACd[LLD_]; XARld = XARd[LLD_];
00344 
00345          for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb )
00346          {
00347             ktmp = *N - k; kb = MIN( ktmp, nb );
00348 /*
00349 *  Solve logical diagonal block, XAC contains the solution scattered in multiple
00350 *  process columns and XAR contains the solution replicated in the process rows.
00351 */
00352             Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
00353             Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
00354             PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k,
00355                        Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0,
00356                        Akq, XARld, size ), XARld );
00357 /*
00358 *  Update: only the part of sub( X ) to be solved at the next step is locally
00359 *  updated and combined, the remaining part of the vector to be solved later is
00360 *  only locally updated.
00361 */
00362             if( Akp > 0 )
00363             {
00364                Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
00365                if( XACsum )
00366                {
00367                   kbprev = MIN( k, nb );
00368                   ktmp   = PB_Cnumroc( kbprev, k-kbprev, Aimb1, Amb, myrow,
00369                                        Arow, nprow );
00370                   Akp   -= ktmp;
00371 
00372                   if( ktmp > 0 )
00373                   {
00374                      if( Anq0 > 0 )
00375                         cgemv_( TRANS, &ktmp, &Anq0, negone,
00376                            Mptr( Aptr, Akp, Akq,   Ald, size ), &Ald,
00377                            Mptr( XAR,    0, Akq, XARld, size ), &XARld, one,
00378                            Mptr( XAC,  Akp,   0, XACld, size ), &ione );
00379                      Asrc = PB_Cindxg2p( k-1, Ainb1, Anb, Acol, Acol, npcol );
00380                      Ccgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp,
00381                                0, XACld, size ), XACld, myrow, Asrc );
00382                      if( mycol != Asrc )
00383                         cset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld,
00384                                size ), &ione );
00385                   }
00386                   if( Akp > 0 && Anq0 > 0 )
00387                      cgemv_( TRANS, &Akp, &Anq0, negone,
00388                              Mptr( Aptr, 0, Akq,   Ald, size ),   &Ald,
00389                              Mptr( XAR,  0, Akq, XARld, size ), &XARld, one,
00390                              XAC, &ione );
00391                }
00392                else
00393                {
00394                   if( Anq0 > 0 )
00395                      cgemv_( TRANS, &Akp, &Anq0, negone,
00396                              Mptr( Aptr, 0, Akq,   Ald, size ),   &Ald,
00397                              Mptr( XAR,  0, Akq, XARld, size ), &XARld, one,
00398                              XAC, &ione );
00399                }
00400             }
00401          }
00402 /*
00403 *  Combine the scattered resulting vector XAC
00404 */
00405          if( XACsum && ( Anp > 0 ) )
00406          {
00407             Ccgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow,
00408                       XACd[CSRC_] );
00409          }
00410 /*
00411 *  sub( X ) := XAC (if necessary)
00412 */
00413          if( XACapbX )
00414          {
00415             PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd, COLUMN,
00416                         zero, ((char *) X), Xi, Xj, Xd, &Xroc );
00417          }
00418 /*
00419 *  Restore BLACS topologies
00420 */
00421          (void) PB_Ctop( &ctxt, BCAST,   COLUMN, &btop );
00422          (void) PB_Ctop( &ctxt, COMBINE, ROW,    &ctop );
00423       }
00424       else
00425       {
00426 /*
00427 *  Save current and enforce ring BLACS topologies
00428 */
00429          btop = *PB_Ctop( &ctxt, BCAST,   COLUMN, TOP_GET   );
00430          ctop = *PB_Ctop( &ctxt, COMBINE, ROW,    TOP_GET   );
00431          (void)  PB_Ctop( &ctxt, BCAST,   COLUMN, TOP_IRING );
00432          (void)  PB_Ctop( &ctxt, COMBINE, ROW,    TOP_IRING );
00433 /*
00434 *  Remove next line when BLACS combine operations support ring topologies.
00435 */
00436          (void)  PB_Ctop( &ctxt, COMBINE, ROW,    TOP_DEFAULT );
00437 /*
00438 *  Reuse sub( X ) and/or create vector XAC in process column owning the first
00439 *  column of sub( A )
00440 */
00441          PB_CInOutV2( type, NOCONJG, COLUMN, *N, *N, 0, Ad0, 1,
00442                       ((char *) X), Xi, Xj, Xd, &Xroc, &XAC, XACd,
00443                       &XACfr, &XACsum, &XACapbX );
00444 /*
00445 *  Create vector XAR in process rows spanned by sub( A )
00446 */
00447          PB_COutV( type, ROW, INIT, *N, *N, Ad0, 1, &XAR, XARd, &XARfr,
00448                    &XARsum );
00449 /*
00450 *  Retrieve local quantities related to Ad0 -> sub( A )
00451 */
00452          Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ];
00453          Amb   = Ad0[MB_  ]; Anb   = Ad0[NB_  ];
00454          Arow  = Ad0[RSRC_]; Acol  = Ad0[CSRC_]; Ald = Ad0[LLD_];
00455          Anp   = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
00456          Anq   = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
00457          if( ( Anp > 0 ) && ( Anq > 0 ) )
00458             Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
00459 
00460          XACld = XACd[LLD_]; XARld = XARd[LLD_];
00461 
00462          for( k = 0; k < *N; k += nb )
00463          {
00464             ktmp = *N - k; kb = MIN( ktmp, nb );
00465 /*
00466 *  Solve logical diagonal block, XAC contains the solution scattered in multiple
00467 *  process columns and XAR contains the solution replicated in the process rows.
00468 */
00469             Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
00470             Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
00471             PB_Cptrsv( type, XARsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k,
00472                        Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0,
00473                        Akq, XARld, size ), XARld );
00474 /*
00475 *  Update: only the part of sub( X ) to be solved at the next step is locally
00476 *  updated and combined, the remaining part of the vector to be solved later is
00477 *  only locally updated.
00478 */
00479             Akp = PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow );
00480             if( ( Anp0 = Anp - Akp ) > 0 )
00481             {
00482                Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
00483                if( XACsum )
00484                {
00485                   kbnext = ktmp - kb; kbnext = MIN( kbnext, nb );
00486                   ktmp   = PB_Cnumroc( kbnext, k+kb, Aimb1, Amb, myrow, Arow,
00487                                        nprow );
00488                   Anp0  -= ktmp;
00489 
00490                   if( ktmp > 0 )
00491                   {
00492                      if( Anq0 > 0 )
00493                         cgemv_( TRANS, &ktmp, &Anq0, negone,
00494                           Mptr( Aptr, Akp, Akq,   Ald, size ), &Ald,
00495                           Mptr( XAR,    0, Akq, XARld, size ), &XARld, one,
00496                           Mptr( XAC,  Akp,   0, XACld, size ), &ione );
00497                      Asrc = PB_Cindxg2p( k+kb, Ainb1, Anb, Acol, Acol, npcol );
00498                      Ccgsum2d( ctxt, ROW, &ctop, ktmp, 1, Mptr( XAC, Akp,
00499                                  0, XACld, size ), XACld, myrow, Asrc );
00500                      if( mycol != Asrc )
00501                         cset_( &ktmp, zero, Mptr( XAC, Akp, 0, XACld,
00502                                size ), &ione );
00503                   }
00504                   if( Anp0 > 0 && Anq0 > 0 )
00505                      cgemv_( TRANS, &Anp0, &Anq0, negone,
00506                       Mptr( Aptr, Akp+ktmp, Akq,   Ald, size ), &Ald,
00507                       Mptr( XAR,         0, Akq, XARld, size ), &XARld,
00508                       one,
00509                       Mptr( XAC,  Akp+ktmp,   0, XACld, size ), &ione );
00510                }
00511                else
00512                {
00513                   if( Anq0 > 0 )
00514                      cgemv_( TRANS, &Anp0, &Anq0, negone,
00515                           Mptr( Aptr, Akp, Akq,   Ald, size ), &Ald,
00516                           Mptr( XAR,    0, Akq, XARld, size ), &XARld,
00517                           one,
00518                           Mptr( XAC,  Akp,   0, XACld, size ), &ione );
00519                }
00520             }
00521          }
00522 /*
00523 *  Combine the scattered resulting vector XAC
00524 */
00525          if( XACsum && ( Anp > 0 ) )
00526          {
00527             Ccgsum2d( ctxt, ROW, &ctop, Anp, 1, XAC, XACld, myrow,
00528                       XACd[CSRC_] );
00529          }
00530 /*
00531 *  sub( X ) := XAC (if necessary)
00532 */
00533          if( XACapbX )
00534          {
00535             PB_Cpaxpby( type, NOCONJG, *N, 1, one, XAC, 0, 0, XACd,
00536                         COLUMN, zero, ((char *) X), Xi, Xj, Xd, &Xroc );
00537          }
00538 /*
00539 *  Restore BLACS topologies
00540 */
00541          (void) PB_Ctop( &ctxt, BCAST,   COLUMN, &btop );
00542          (void) PB_Ctop( &ctxt, COMBINE, ROW,    &ctop );
00543       }
00544    }
00545    else
00546    {
00547       if( upper )
00548       {
00549 /*
00550 *  Save current and enforce ring BLACS topologies
00551 */
00552          btop = *PB_Ctop( &ctxt, BCAST,   ROW,    TOP_GET   );
00553          ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET   );
00554          (void)  PB_Ctop( &ctxt, BCAST,   ROW,    TOP_IRING );
00555          (void)  PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_IRING );
00556 /*
00557 *  Remove next line when BLACS combine operations support ring topologies.
00558 */
00559          (void)  PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT );
00560 /*
00561 *  Reuse sub( X ) and/or create vector XAR in process row owning the first row
00562 *  of sub( A )
00563 */
00564          PB_CInOutV2( type, NOCONJG, ROW, *N, *N, 0, Ad0, 1,
00565                       ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd,
00566                       &XARfr, &XARsum, &XARapbX );
00567 /*
00568 *  Create vector XAC in process columns spanned by sub( A )
00569 */
00570          PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr,
00571                    &XACsum );
00572 /*
00573 *  Retrieve local quantities related to Ad0 -> sub( A )
00574 */
00575          Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ];
00576          Amb   = Ad0[MB_  ]; Anb   = Ad0[NB_  ];
00577          Arow  = Ad0[RSRC_]; Acol  = Ad0[CSRC_]; Ald = Ad0[LLD_];
00578          Anp   = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
00579          Anq   = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
00580          if( ( Anp > 0 ) && ( Anq > 0 ) )
00581             Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
00582 
00583          XACld = XACd[LLD_]; XARld = XARd[LLD_];
00584 
00585          for( k = 0; k < *N; k += nb )
00586          {
00587             ktmp = *N - k; kb = MIN( ktmp, nb );
00588 /*
00589 *  Solve logical diagonal block, XAR contains the solution scattered in multiple
00590 *  process rows and XAC contains the solution replicated in the process columns.
00591 */
00592             Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
00593             Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
00594             PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k,
00595                        Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0,
00596                        Akq, XARld, size ), XARld );
00597 /*
00598 *  Update: only the part of sub( X ) to be solved at the next step is locally
00599 *  updated and combined, the remaining part of the vector to be solved later is
00600 *  only locally updated.
00601 */
00602             Akq = PB_Cnumroc( k+kb, 0, Ainb1, Anb, mycol, Acol, npcol );
00603             if( ( Anq0 = Anq - Akq ) > 0 )
00604             {
00605                Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow );
00606                if( XARsum )
00607                {
00608                   kbnext = ktmp - kb; kbnext = MIN( kbnext, nb );
00609                   ktmp   = PB_Cnumroc( kbnext, k+kb, Ainb1, Anb, mycol, Acol,
00610                                        npcol );
00611                   Anq0  -= ktmp;
00612 
00613                   if( ktmp > 0 )
00614                   {
00615                      if( Anp0 > 0 )
00616                         cgemv_( TRANS, &Anp0, &ktmp, negone,
00617                          Mptr( Aptr, Akp, Akq,   Ald, size ), &Ald,
00618                          Mptr( XAC,  Akp,   0, XACld, size ), &ione, one,
00619                          Mptr( XAR,    0, Akq, XARld, size ), &XARld );
00620                      Asrc = PB_Cindxg2p( k+kb, Aimb1, Amb, Arow, Arow, nprow );
00621                      Ccgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0,
00622                                Akq, XARld, size ), XARld, Asrc, mycol );
00623                      if( myrow != Asrc )
00624                         cset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld,
00625                                size ), &XARld );
00626                   }
00627                   if( Anp0 > 0 && Anq0 > 0 )
00628                      cgemv_( TRANS, &Anp0, &Anq0, negone,
00629                      Mptr( Aptr, Akp, Akq+ktmp,   Ald, size ),   &Ald,
00630                      Mptr( XAC,  Akp,        0, XACld, size ),  &ione, one,
00631                      Mptr( XAR,    0, Akq+ktmp, XARld, size ), &XARld );
00632                }
00633                else
00634                {
00635                   if( Anp0 > 0 )
00636                      cgemv_( TRANS, &Anp0, &Anq0, negone,
00637                         Mptr( Aptr, Akp, Akq,   Ald, size ),   &Ald,
00638                         Mptr( XAC,  Akp,   0, XACld, size ),  &ione, one,
00639                         Mptr( XAR,    0, Akq, XARld, size ), &XARld );
00640                }
00641             }
00642          }
00643 /*
00644 *  Combine the scattered resulting vector XAR
00645 */
00646          if( XARsum && ( Anq > 0 ) )
00647          {
00648             Ccgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_],
00649                       mycol );
00650          }
00651 /*
00652 *  sub( X ) := XAR (if necessary)
00653 */
00654          if( XARapbX )
00655          {
00656             PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero,
00657                         ((char *) X), Xi, Xj, Xd, &Xroc );
00658          }
00659 /*
00660 *  Restore BLACS topologies
00661 */
00662          (void) PB_Ctop( &ctxt, BCAST,   ROW,    &btop );
00663          (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop );
00664       }
00665       else
00666       {
00667 /*
00668 *  Save current and enforce ring BLACS topologies
00669 */
00670          btop = *PB_Ctop( &ctxt, BCAST,   ROW,    TOP_GET   );
00671          ctop = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET   );
00672          (void)  PB_Ctop( &ctxt, BCAST,   ROW,    TOP_DRING );
00673          (void)  PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DRING );
00674 /*
00675 *  Remove next line when BLACS combine operations support ring topologies.
00676 */
00677          (void)  PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_DEFAULT );
00678 /*
00679 *  Reuse sub( X ) and/or create vector XAC in process row owning the last row
00680 *  of sub( A )
00681 */
00682          PB_CInOutV2( type, NOCONJG, ROW, *N, *N, *N-1, Ad0, 1,
00683                       ((char *) X), Xi, Xj, Xd, &Xroc, &XAR, XARd,
00684                       &XARfr, &XARsum, &XARapbX );
00685 /*
00686 *  Create vector XAC in process columns spanned by sub( A )
00687 */
00688          PB_COutV( type, COLUMN, INIT, *N, *N, Ad0, 1, &XAC, XACd, &XACfr,
00689                    &XACsum );
00690 /*
00691 *  Retrieve local quantities related to Ad0 -> sub( A )
00692 */
00693          Aimb1 = Ad0[IMB_ ]; Ainb1 = Ad0[INB_ ];
00694          Amb   = Ad0[MB_  ]; Anb   = Ad0[NB_  ];
00695          Arow  = Ad0[RSRC_]; Acol  = Ad0[CSRC_]; Ald = Ad0[LLD_];
00696          Anp   = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
00697          Anq   = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
00698          if( ( Anp > 0 ) && ( Anq > 0 ) )
00699             Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
00700 
00701          XACld = XACd[LLD_]; XARld = XARd[LLD_];
00702 
00703          for( k = ( ( *N - 1 ) / nb ) * nb; k >= 0; k -= nb )
00704          {
00705             ktmp = *N - k; kb = MIN( ktmp, nb );
00706 /*
00707 *  Solve logical diagonal block, XAR contains the solution scattered in multiple
00708 *  process rows and XAC contains the solution replicated in the process columns.
00709 */
00710             Akp = PB_Cnumroc( k,  0, Aimb1, Amb, myrow, Arow, nprow );
00711             Akq = PB_Cnumroc( k,  0, Ainb1, Anb, mycol, Acol, npcol );
00712             PB_Cptrsv( type, XACsum, &UploA, &TranOp, &DiagA, kb, Aptr, k, k,
00713                        Ad0, Mptr( XAC, Akp, 0, XACld, size ), 1, Mptr( XAR, 0,
00714                        Akq, XARld, size ), XARld );
00715 /*
00716 *  Update: only the part of sub( X ) to be solved at the next step is locally
00717 *  updated and combined, the remaining part of the vector to be solved later
00718 *  is only locally updated.
00719 */
00720             if( Akq > 0 )
00721             {
00722                Anp0 = PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow );
00723                if( XARsum )
00724                {
00725                   kbprev = MIN( k, nb );
00726                   ktmp   = PB_Cnumroc( kbprev, k-kbprev, Ainb1, Anb, mycol,
00727                                        Acol, npcol );
00728                   Akq   -= ktmp;
00729 
00730                   if( ktmp > 0 )
00731                   {
00732                      if( Anp0 > 0 )
00733                         cgemv_( TRANS, &Anp0, &ktmp, negone,
00734                                 Mptr( Aptr, Akp, Akq,   Ald, size ), &Ald,
00735                                 Mptr( XAC,  Akp,   0, XACld, size ), &ione, one,
00736                                 Mptr( XAR,    0, Akq, XARld, size ), &XARld );
00737                      Asrc = PB_Cindxg2p( k-1, Aimb1, Amb, Arow, Arow, nprow );
00738                      Ccgsum2d( ctxt, COLUMN, &ctop, 1, ktmp, Mptr( XAR, 0,
00739                                Akq, XARld, size ), XARld, Asrc, mycol );
00740                      if( myrow != Asrc )
00741                         cset_( &ktmp, zero, Mptr( XAR, 0, Akq, XARld,
00742                                size ), &XARld );
00743                   }
00744                   if( Anp0 > 0 && Akq > 0 )
00745                      cgemv_( TRANS, &Anp0, &Akq, negone,
00746                              Mptr( Aptr, Akp, 0,   Ald, size ), &Ald,
00747                              Mptr( XAC,  Akp, 0, XACld, size ), &ione,
00748                              one, XAR, &XARld );
00749                }
00750                else
00751                {
00752                   if( Anp0 > 0 )
00753                      cgemv_( TRANS, &Anp0, &Akq, negone,
00754                              Mptr( Aptr, Akp, 0,   Ald, size ), &Ald,
00755                              Mptr( XAC,  Akp, 0, XACld, size ), &ione,
00756                              one, XAR, &XARld );
00757                }
00758             }
00759          }
00760 /*
00761 *  Combine the scattered resulting vector XAR
00762 */
00763          if( XARsum && ( Anq > 0 ) )
00764          {
00765             Ccgsum2d( ctxt, COLUMN, &ctop, 1, Anq, XAR, XARld, XARd[RSRC_],
00766                       mycol );
00767          }
00768 /*
00769 *  sub( X ) := XAR (if necessary)
00770 */
00771          if( XARapbX )
00772          {
00773             PB_Cpaxpby( type, NOCONJG, 1, *N, one, XAR, 0, 0, XARd, ROW, zero,
00774                         ((char *) X), Xi, Xj, Xd, &Xroc );
00775          }
00776 /*
00777 *  Restore BLACS topologies
00778 */
00779          (void) PB_Ctop( &ctxt, BCAST,   ROW,    &btop );
00780          (void) PB_Ctop( &ctxt, COMBINE, COLUMN, &ctop );
00781       }
00782    }
00783    if( XACfr ) free( XAC );
00784    if( XARfr ) free( XAR );
00785 /*
00786 *  End of PCTRSV
00787 */
00788 }