ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pzdttrsv.f
Go to the documentation of this file.
00001       SUBROUTINE PZDTTRSV( UPLO, TRANS, N, NRHS, DL, D, DU, JA, DESCA,
00002      $                     B, IB, DESCB, AF, LAF, WORK, LWORK, INFO )
00003 *
00004 *  -- ScaLAPACK routine (version 2.0.2) --
00005 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
00006 *     May 1 2012
00007 *
00008 *     .. Scalar Arguments ..
00009       CHARACTER          TRANS, UPLO
00010       INTEGER            IB, INFO, JA, LAF, LWORK, N, NRHS
00011 *     ..
00012 *     .. Array Arguments ..
00013       INTEGER            DESCA( * ), DESCB( * )
00014       COMPLEX*16         AF( * ), B( * ), D( * ), DL( * ), DU( * ),
00015      $                   WORK( * )
00016 *     ..
00017 *
00018 *
00019 *  Purpose
00020 *  =======
00021 *
00022 *  PZDTTRSV solves a tridiagonal triangular system of linear equations
00023 *
00024 *                      A(1:N, JA:JA+N-1) * X = B(IB:IB+N-1, 1:NRHS)
00025 *                                 or
00026 *                      A(1:N, JA:JA+N-1)^H * X = B(IB:IB+N-1, 1:NRHS)
00027 *
00028 *  where A(1:N, JA:JA+N-1) is a tridiagonal
00029 *  triangular matrix factor produced by the
00030 *  Gaussian elimination code PZ@(dom_pre)TTRF
00031 *  and is stored in A(1:N,JA:JA+N-1) and AF.
00032 *  The matrix stored in A(1:N, JA:JA+N-1) is either
00033 *  upper or lower triangular according to UPLO,
00034 *  and the choice of solving A(1:N, JA:JA+N-1) or A(1:N, JA:JA+N-1)^H
00035 *  is dictated by the user by the parameter TRANS.
00036 *
00037 *  Routine PZDTTRF MUST be called first.
00038 *
00039 *  =====================================================================
00040 *
00041 *  Arguments
00042 *  =========
00043 *
00044 *  UPLO    (global input) CHARACTER
00045 *          = 'U':  Upper triangle of A(1:N, JA:JA+N-1) is stored;
00046 *          = 'L':  Lower triangle of A(1:N, JA:JA+N-1) is stored.
00047 *
00048 *  TRANS   (global input) CHARACTER
00049 *          = 'N':  Solve with A(1:N, JA:JA+N-1);
00050 *          = 'C':  Solve with conjugate_transpose( A(1:N, JA:JA+N-1) );
00051 *
00052 *  N       (global input) INTEGER
00053 *          The number of rows and columns to be operated on, i.e. the
00054 *          order of the distributed submatrix A(1:N, JA:JA+N-1). N >= 0.
00055 *
00056 *  NRHS    (global input) INTEGER
00057 *          The number of right hand sides, i.e., the number of columns
00058 *          of the distributed submatrix B(IB:IB+N-1, 1:NRHS).
00059 *          NRHS >= 0.
00060 *
00061 *  DL      (local input/local output) COMPLEX*16 pointer to local
00062 *          part of global vector storing the lower diagonal of the
00063 *          matrix. Globally, DL(1) is not referenced, and DL must be
00064 *          aligned with D.
00065 *          Must be of size >= DESCA( NB_ ).
00066 *          On exit, this array contains information containing the
00067 *            factors of the matrix.
00068 *
00069 *  D       (local input/local output) COMPLEX*16 pointer to local
00070 *          part of global vector storing the main diagonal of the
00071 *          matrix.
00072 *          On exit, this array contains information containing the
00073 *            factors of the matrix.
00074 *          Must be of size >= DESCA( NB_ ).
00075 *
00076 *  DU       (local input/local output) COMPLEX*16 pointer to local
00077 *          part of global vector storing the upper diagonal of the
00078 *          matrix. Globally, DU(n) is not referenced, and DU must be
00079 *          aligned with D.
00080 *          On exit, this array contains information containing the
00081 *            factors of the matrix.
00082 *          Must be of size >= DESCA( NB_ ).
00083 *
00084 *  JA      (global input) INTEGER
00085 *          The index in the global array A that points to the start of
00086 *          the matrix to be operated on (which may be either all of A
00087 *          or a submatrix of A).
00088 *
00089 *  DESCA   (global and local input) INTEGER array of dimension DLEN.
00090 *          if 1D type (DTYPE_A=501 or 502), DLEN >= 7;
00091 *          if 2D type (DTYPE_A=1), DLEN >= 9.
00092 *          The array descriptor for the distributed matrix A.
00093 *          Contains information of mapping of A to memory. Please
00094 *          see NOTES below for full description and options.
00095 *
00096 *  B       (local input/local output) COMPLEX*16 pointer into
00097 *          local memory to an array of local lead dimension lld_b>=NB.
00098 *          On entry, this array contains the
00099 *          the local pieces of the right hand sides
00100 *          B(IB:IB+N-1, 1:NRHS).
00101 *          On exit, this contains the local piece of the solutions
00102 *          distributed matrix X.
00103 *
00104 *  IB      (global input) INTEGER
00105 *          The row index in the global array B that points to the first
00106 *          row of the matrix to be operated on (which may be either
00107 *          all of B or a submatrix of B).
00108 *
00109 *  DESCB   (global and local input) INTEGER array of dimension DLEN.
00110 *          if 1D type (DTYPE_B=502), DLEN >=7;
00111 *          if 2D type (DTYPE_B=1), DLEN >= 9.
00112 *          The array descriptor for the distributed matrix B.
00113 *          Contains information of mapping of B to memory. Please
00114 *          see NOTES below for full description and options.
00115 *
00116 *  AF      (local output) COMPLEX*16 array, dimension LAF.
00117 *          Auxiliary Fillin Space.
00118 *          Fillin is created during the factorization routine
00119 *          PZDTTRF and this is stored in AF. If a linear system
00120 *          is to be solved using PZDTTRS after the factorization
00121 *          routine, AF *must not be altered* after the factorization.
00122 *
00123 *  LAF     (local input) INTEGER
00124 *          Size of user-input Auxiliary Fillin space AF. Must be >=
00125 *          2*(NB+2)
00126 *          If LAF is not large enough, an error code will be returned
00127 *          and the minimum acceptable size will be returned in AF( 1 )
00128 *
00129 *  WORK    (local workspace/local output)
00130 *          COMPLEX*16 temporary workspace. This space may
00131 *          be overwritten in between calls to routines. WORK must be
00132 *          the size given in LWORK.
00133 *          On exit, WORK( 1 ) contains the minimal LWORK.
00134 *
00135 *  LWORK   (local input or global input) INTEGER
00136 *          Size of user-input workspace WORK.
00137 *          If LWORK is too small, the minimal acceptable size will be
00138 *          returned in WORK(1) and an error code is returned. LWORK>=
00139 *          10*NPCOL+4*NRHS
00140 *
00141 *  INFO    (local output) INTEGER
00142 *          = 0:  successful exit
00143 *          < 0:  If the i-th argument is an array and the j-entry had
00144 *                an illegal value, then INFO = -(i*100+j), if the i-th
00145 *                argument is a scalar and had an illegal value, then
00146 *                INFO = -i.
00147 *
00148 *  =====================================================================
00149 *
00150 *
00151 *  Restrictions
00152 *  ============
00153 *
00154 *  The following are restrictions on the input parameters. Some of these
00155 *    are temporary and will be removed in future releases, while others
00156 *    may reflect fundamental technical limitations.
00157 *
00158 *    Non-cyclic restriction: VERY IMPORTANT!
00159 *      P*NB>= mod(JA-1,NB)+N.
00160 *      The mapping for matrices must be blocked, reflecting the nature
00161 *      of the divide and conquer algorithm as a task-parallel algorithm.
00162 *      This formula in words is: no processor may have more than one
00163 *      chunk of the matrix.
00164 *
00165 *    Blocksize cannot be too small:
00166 *      If the matrix spans more than one processor, the following
00167 *      restriction on NB, the size of each block on each processor,
00168 *      must hold:
00169 *      NB >= 2
00170 *      The bulk of parallel computation is done on the matrix of size
00171 *      O(NB) on each processor. If this is too small, divide and conquer
00172 *      is a poor choice of algorithm.
00173 *
00174 *    Submatrix reference:
00175 *      JA = IB
00176 *      Alignment restriction that prevents unnecessary communication.
00177 *
00178 *
00179 *  =====================================================================
00180 *
00181 *
00182 *  Notes
00183 *  =====
00184 *
00185 *  If the factorization routine and the solve routine are to be called
00186 *    separately (to solve various sets of righthand sides using the same
00187 *    coefficient matrix), the auxiliary space AF *must not be altered*
00188 *    between calls to the factorization routine and the solve routine.
00189 *
00190 *  The best algorithm for solving banded and tridiagonal linear systems
00191 *    depends on a variety of parameters, especially the bandwidth.
00192 *    Currently, only algorithms designed for the case N/P >> bw are
00193 *    implemented. These go by many names, including Divide and Conquer,
00194 *    Partitioning, domain decomposition-type, etc.
00195 *    For tridiagonal matrices, it is obvious: N/P >> bw(=1), and so D&C
00196 *    algorithms are the appropriate choice.
00197 *
00198 *  Algorithm description: Divide and Conquer
00199 *
00200 *    The Divide and Conqer algorithm assumes the matrix is narrowly
00201 *      banded compared with the number of equations. In this situation,
00202 *      it is best to distribute the input matrix A one-dimensionally,
00203 *      with columns atomic and rows divided amongst the processes.
00204 *      The basic algorithm divides the tridiagonal matrix up into
00205 *      P pieces with one stored on each processor,
00206 *      and then proceeds in 2 phases for the factorization or 3 for the
00207 *      solution of a linear system.
00208 *      1) Local Phase:
00209 *         The individual pieces are factored independently and in
00210 *         parallel. These factors are applied to the matrix creating
00211 *         fillin, which is stored in a non-inspectable way in auxiliary
00212 *         space AF. Mathematically, this is equivalent to reordering
00213 *         the matrix A as P A P^T and then factoring the principal
00214 *         leading submatrix of size equal to the sum of the sizes of
00215 *         the matrices factored on each processor. The factors of
00216 *         these submatrices overwrite the corresponding parts of A
00217 *         in memory.
00218 *      2) Reduced System Phase:
00219 *         A small ((P-1)) system is formed representing
00220 *         interaction of the larger blocks, and is stored (as are its
00221 *         factors) in the space AF. A parallel Block Cyclic Reduction
00222 *         algorithm is used. For a linear system, a parallel front solve
00223 *         followed by an analagous backsolve, both using the structure
00224 *         of the factored matrix, are performed.
00225 *      3) Backsubsitution Phase:
00226 *         For a linear system, a local backsubstitution is performed on
00227 *         each processor in parallel.
00228 *
00229 *
00230 *  Descriptors
00231 *  ===========
00232 *
00233 *  Descriptors now have *types* and differ from ScaLAPACK 1.0.
00234 *
00235 *  Note: tridiagonal codes can use either the old two dimensional
00236 *    or new one-dimensional descriptors, though the processor grid in
00237 *    both cases *must be one-dimensional*. We describe both types below.
00238 *
00239 *  Each global data object is described by an associated description
00240 *  vector.  This vector stores the information required to establish
00241 *  the mapping between an object element and its corresponding process
00242 *  and memory location.
00243 *
00244 *  Let A be a generic term for any 2D block cyclicly distributed array.
00245 *  Such a global array has an associated description vector DESCA.
00246 *  In the following comments, the character _ should be read as
00247 *  "of the global array".
00248 *
00249 *  NOTATION        STORED IN      EXPLANATION
00250 *  --------------- -------------- --------------------------------------
00251 *  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
00252 *                                 DTYPE_A = 1.
00253 *  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
00254 *                                 the BLACS process grid A is distribu-
00255 *                                 ted over. The context itself is glo-
00256 *                                 bal, but the handle (the integer
00257 *                                 value) may vary.
00258 *  M_A    (global) DESCA( M_ )    The number of rows in the global
00259 *                                 array A.
00260 *  N_A    (global) DESCA( N_ )    The number of columns in the global
00261 *                                 array A.
00262 *  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
00263 *                                 the rows of the array.
00264 *  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
00265 *                                 the columns of the array.
00266 *  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
00267 *                                 row of the array A is distributed.
00268 *  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
00269 *                                 first column of the array A is
00270 *                                 distributed.
00271 *  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
00272 *                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
00273 *
00274 *  Let K be the number of rows or columns of a distributed matrix,
00275 *  and assume that its process grid has dimension p x q.
00276 *  LOCr( K ) denotes the number of elements of K that a process
00277 *  would receive if K were distributed over the p processes of its
00278 *  process column.
00279 *  Similarly, LOCc( K ) denotes the number of elements of K that a
00280 *  process would receive if K were distributed over the q processes of
00281 *  its process row.
00282 *  The values of LOCr() and LOCc() may be determined via a call to the
00283 *  ScaLAPACK tool function, NUMROC:
00284 *          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
00285 *          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
00286 *  An upper bound for these quantities may be computed by:
00287 *          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
00288 *          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
00289 *
00290 *
00291 *  One-dimensional descriptors:
00292 *
00293 *  One-dimensional descriptors are a new addition to ScaLAPACK since
00294 *    version 1.0. They simplify and shorten the descriptor for 1D
00295 *    arrays.
00296 *
00297 *  Since ScaLAPACK supports two-dimensional arrays as the fundamental
00298 *    object, we allow 1D arrays to be distributed either over the
00299 *    first dimension of the array (as if the grid were P-by-1) or the
00300 *    2nd dimension (as if the grid were 1-by-P). This choice is
00301 *    indicated by the descriptor type (501 or 502)
00302 *    as described below.
00303 *    However, for tridiagonal matrices, since the objects being
00304 *    distributed are the individual vectors storing the diagonals, we
00305 *    have adopted the convention that both the P-by-1 descriptor and
00306 *    the 1-by-P descriptor are allowed and are equivalent for
00307 *    tridiagonal matrices. Thus, for tridiagonal matrices,
00308 *    DTYPE_A = 501 or 502 can be used interchangeably
00309 *    without any other change.
00310 *  We require that the distributed vectors storing the diagonals of a
00311 *    tridiagonal matrix be aligned with each other. Because of this, a
00312 *    single descriptor, DESCA, serves to describe the distribution of
00313 *    of all diagonals simultaneously.
00314 *
00315 *    IMPORTANT NOTE: the actual BLACS grid represented by the
00316 *    CTXT entry in the descriptor may be *either*  P-by-1 or 1-by-P
00317 *    irrespective of which one-dimensional descriptor type
00318 *    (501 or 502) is input.
00319 *    This routine will interpret the grid properly either way.
00320 *    ScaLAPACK routines *do not support intercontext operations* so that
00321 *    the grid passed to a single ScaLAPACK routine *must be the same*
00322 *    for all array descriptors passed to that routine.
00323 *
00324 *    NOTE: In all cases where 1D descriptors are used, 2D descriptors
00325 *    may also be used, since a one-dimensional array is a special case
00326 *    of a two-dimensional array with one dimension of size unity.
00327 *    The two-dimensional array used in this case *must* be of the
00328 *    proper orientation:
00329 *      If the appropriate one-dimensional descriptor is DTYPEA=501
00330 *      (1 by P type), then the two dimensional descriptor must
00331 *      have a CTXT value that refers to a 1 by P BLACS grid;
00332 *      If the appropriate one-dimensional descriptor is DTYPEA=502
00333 *      (P by 1 type), then the two dimensional descriptor must
00334 *      have a CTXT value that refers to a P by 1 BLACS grid.
00335 *
00336 *
00337 *  Summary of allowed descriptors, types, and BLACS grids:
00338 *  DTYPE           501         502         1         1
00339 *  BLACS grid      1xP or Px1  1xP or Px1  1xP       Px1
00340 *  -----------------------------------------------------
00341 *  A               OK          OK          OK        NO
00342 *  B               NO          OK          NO        OK
00343 *
00344 *  Note that a consequence of this chart is that it is not possible
00345 *    for *both* DTYPE_A and DTYPE_B to be 2D_type(1), as these lead
00346 *    to opposite requirements for the orientation of the BLACS grid,
00347 *    and as noted before, the *same* BLACS context must be used in
00348 *    all descriptors in a single ScaLAPACK subroutine call.
00349 *
00350 *  Let A be a generic term for any 1D block cyclicly distributed array.
00351 *  Such a global array has an associated description vector DESCA.
00352 *  In the following comments, the character _ should be read as
00353 *  "of the global array".
00354 *
00355 *  NOTATION        STORED IN  EXPLANATION
00356 *  --------------- ---------- ------------------------------------------
00357 *  DTYPE_A(global) DESCA( 1 ) The descriptor type. For 1D grids,
00358 *                                TYPE_A = 501: 1-by-P grid.
00359 *                                TYPE_A = 502: P-by-1 grid.
00360 *  CTXT_A (global) DESCA( 2 ) The BLACS context handle, indicating
00361 *                                the BLACS process grid A is distribu-
00362 *                                ted over. The context itself is glo-
00363 *                                bal, but the handle (the integer
00364 *                                value) may vary.
00365 *  N_A    (global) DESCA( 3 ) The size of the array dimension being
00366 *                                distributed.
00367 *  NB_A   (global) DESCA( 4 ) The blocking factor used to distribute
00368 *                                the distributed dimension of the array.
00369 *  SRC_A  (global) DESCA( 5 ) The process row or column over which the
00370 *                                first row or column of the array
00371 *                                is distributed.
00372 *  Ignored         DESCA( 6 ) Ignored for tridiagonal matrices.
00373 *  Reserved        DESCA( 7 ) Reserved for future use.
00374 *
00375 *
00376 *
00377 *  =====================================================================
00378 *
00379 *  Code Developer: Andrew J. Cleary, University of Tennessee.
00380 *    Current address: Lawrence Livermore National Labs.
00381 *  This version released: August, 2001.
00382 *
00383 *  =====================================================================
00384 *
00385 *     ..
00386 *     .. Parameters ..
00387       DOUBLE PRECISION   ONE, ZERO
00388       PARAMETER          ( ONE = 1.0D+0 )
00389       PARAMETER          ( ZERO = 0.0D+0 )
00390       COMPLEX*16         CONE, CZERO
00391       PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
00392       PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ) )
00393       INTEGER            INT_ONE
00394       PARAMETER          ( INT_ONE = 1 )
00395       INTEGER            DESCMULT, BIGNUM
00396       PARAMETER          (DESCMULT = 100, BIGNUM = DESCMULT * DESCMULT)
00397       INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
00398      $                   LLD_, MB_, M_, NB_, N_, RSRC_
00399       PARAMETER          ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
00400      $                     CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
00401      $                     RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
00402 *     ..
00403 *     .. Local Scalars ..
00404       INTEGER            CSRC, FIRST_PROC, ICTXT, ICTXT_NEW, ICTXT_SAVE,
00405      $                   IDUM1, IDUM2, IDUM3, JA_NEW, LEVEL_DIST, LLDA,
00406      $                   LLDB, MYCOL, MYROW, MY_NUM_COLS, NB, NP, NPCOL,
00407      $                   NPROW, NP_SAVE, ODD_SIZE, PART_OFFSET,
00408      $                   PART_SIZE, RETURN_CODE, STORE_M_B, STORE_N_A,
00409      $                   TEMP, WORK_SIZE_MIN, WORK_U
00410 *     ..
00411 *     .. Local Arrays ..
00412       INTEGER            DESCA_1XP( 7 ), DESCB_PX1( 7 ),
00413      $                   PARAM_CHECK( 16, 3 )
00414 *     ..
00415 *     .. External Subroutines ..
00416       EXTERNAL           BLACS_GET, BLACS_GRIDEXIT, BLACS_GRIDINFO,
00417      $                   DESC_CONVERT, GLOBCHK, PXERBLA, RESHAPE, ZGEMM,
00418      $                   ZGERV2D, ZGESD2D, ZLAMOV, ZMATADD, ZTBTRS,
00419      $                   ZTRMM, ZTRTRS
00420 *     ..
00421 *     .. External Functions ..
00422       LOGICAL            LSAME
00423       INTEGER            NUMROC
00424       COMPLEX*16         ZDOTC
00425       EXTERNAL           LSAME, NUMROC, ZDOTC
00426 *     ..
00427 *     .. Intrinsic Functions ..
00428       INTRINSIC          ICHAR, MIN, MOD
00429 *     ..
00430 *     .. Executable Statements ..
00431 *
00432 *     Test the input parameters
00433 *
00434       INFO = 0
00435 *
00436 *     Convert descriptor into standard form for easy access to
00437 *        parameters, check that grid is of right shape.
00438 *
00439       DESCA_1XP( 1 ) = 501
00440       DESCB_PX1( 1 ) = 502
00441 *
00442       TEMP = DESCA( DTYPE_ )
00443       IF( TEMP .EQ. 502 ) THEN
00444 *        Temporarily set the descriptor type to 1xP type
00445          DESCA( DTYPE_ ) = 501
00446       ENDIF
00447 *
00448       CALL DESC_CONVERT( DESCA, DESCA_1XP, RETURN_CODE )
00449 *
00450       DESCA( DTYPE_ ) = TEMP
00451 *
00452       IF( RETURN_CODE .NE. 0) THEN
00453          INFO = -( 9*100 + 2 )
00454       ENDIF
00455 *
00456       CALL DESC_CONVERT( DESCB, DESCB_PX1, RETURN_CODE )
00457 *
00458       IF( RETURN_CODE .NE. 0) THEN
00459          INFO = -( 12*100 + 2 )
00460       ENDIF
00461 *
00462 *     Consistency checks for DESCA and DESCB.
00463 *
00464 *     Context must be the same
00465       IF( DESCA_1XP( 2 ) .NE. DESCB_PX1( 2 ) ) THEN
00466          INFO = -( 12*100 + 2 )
00467       ENDIF
00468 *
00469 *        These are alignment restrictions that may or may not be removed
00470 *        in future releases. -Andy Cleary, April 14, 1996.
00471 *
00472 *     Block sizes must be the same
00473       IF( DESCA_1XP( 4 ) .NE. DESCB_PX1( 4 ) ) THEN
00474          INFO = -( 12*100 + 4 )
00475       ENDIF
00476 *
00477 *     Source processor must be the same
00478 *
00479       IF( DESCA_1XP( 5 ) .NE. DESCB_PX1( 5 ) ) THEN
00480          INFO = -( 12*100 + 5 )
00481       ENDIF
00482 *
00483 *     Get values out of descriptor for use in code.
00484 *
00485       ICTXT = DESCA_1XP( 2 )
00486       CSRC = DESCA_1XP( 5 )
00487       NB = DESCA_1XP( 4 )
00488       LLDA = DESCA_1XP( 6 )
00489       STORE_N_A = DESCA_1XP( 3 )
00490       LLDB = DESCB_PX1( 6 )
00491       STORE_M_B = DESCB_PX1( 3 )
00492 *
00493 *     Get grid parameters
00494 *
00495 *
00496       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00497       NP = NPROW * NPCOL
00498 *
00499 *
00500 *
00501       IF( LSAME( UPLO, 'U' ) ) THEN
00502          IDUM1 = ICHAR( 'U' )
00503       ELSE IF ( LSAME( UPLO, 'L' ) ) THEN
00504          IDUM1 = ICHAR( 'L' )
00505       ELSE
00506          INFO = -1
00507       END IF
00508 *
00509       IF( LSAME( TRANS, 'N' ) ) THEN
00510          IDUM2 = ICHAR( 'N' )
00511       ELSE IF ( LSAME( TRANS, 'C' ) ) THEN
00512          IDUM2 = ICHAR( 'C' )
00513       ELSE
00514          INFO = -2
00515       END IF
00516 *
00517       IF( LWORK .LT. -1) THEN
00518          INFO = -16
00519       ELSE IF ( LWORK .EQ. -1 ) THEN
00520          IDUM3 = -1
00521       ELSE
00522          IDUM3 = 1
00523       ENDIF
00524 *
00525       IF( N .LT. 0 ) THEN
00526          INFO = -3
00527       ENDIF
00528 *
00529       IF( N+JA-1 .GT. STORE_N_A ) THEN
00530          INFO = -( 9*100 + 6 )
00531       ENDIF
00532 *
00533       IF( N+IB-1 .GT. STORE_M_B ) THEN
00534          INFO = -( 12*100 + 3 )
00535       ENDIF
00536 *
00537       IF( LLDB .LT. NB ) THEN
00538          INFO = -( 12*100 + 6 )
00539       ENDIF
00540 *
00541       IF( NRHS .LT. 0 ) THEN
00542          INFO = -4
00543       ENDIF
00544 *
00545 *     Current alignment restriction
00546 *
00547       IF( JA .NE. IB) THEN
00548          INFO = -8
00549       ENDIF
00550 *
00551 *     Argument checking that is specific to Divide & Conquer routine
00552 *
00553       IF( NPROW .NE. 1 ) THEN
00554          INFO = -( 9*100+2 )
00555       ENDIF
00556 *
00557       IF( N .GT. NP*NB-MOD( JA-1, NB )) THEN
00558          INFO = -( 3 )
00559          CALL PXERBLA( ICTXT,
00560      $      'PZDTTRSV, D&C alg.: only 1 block per proc',
00561      $      -INFO )
00562          RETURN
00563       ENDIF
00564 *
00565       IF((JA+N-1.GT.NB) .AND. ( NB.LT.2*INT_ONE )) THEN
00566          INFO = -( 9*100+4 )
00567          CALL PXERBLA( ICTXT,
00568      $      'PZDTTRSV, D&C alg.: NB too small',
00569      $      -INFO )
00570          RETURN
00571       ENDIF
00572 *
00573 *
00574       WORK_SIZE_MIN =
00575      $           INT_ONE*NRHS
00576 *
00577       WORK( 1 ) = WORK_SIZE_MIN
00578 *
00579       IF( LWORK .LT. WORK_SIZE_MIN ) THEN
00580          IF( LWORK .NE. -1 ) THEN
00581          INFO = -16
00582          CALL PXERBLA( ICTXT,
00583      $      'PZDTTRSV: worksize error',
00584      $      -INFO )
00585          ENDIF
00586          RETURN
00587       ENDIF
00588 *
00589 *     Pack params and positions into arrays for global consistency check
00590 *
00591       PARAM_CHECK( 16, 1 ) = DESCB(5)
00592       PARAM_CHECK( 15, 1 ) = DESCB(4)
00593       PARAM_CHECK( 14, 1 ) = DESCB(3)
00594       PARAM_CHECK( 13, 1 ) = DESCB(2)
00595       PARAM_CHECK( 12, 1 ) = DESCB(1)
00596       PARAM_CHECK( 11, 1 ) = IB
00597       PARAM_CHECK( 10, 1 ) = DESCA(5)
00598       PARAM_CHECK(  9, 1 ) = DESCA(4)
00599       PARAM_CHECK(  8, 1 ) = DESCA(3)
00600       PARAM_CHECK(  7, 1 ) = DESCA(1)
00601       PARAM_CHECK(  6, 1 ) = JA
00602       PARAM_CHECK(  5, 1 ) = NRHS
00603       PARAM_CHECK(  4, 1 ) = N
00604       PARAM_CHECK(  3, 1 ) = IDUM3
00605       PARAM_CHECK(  2, 1 ) = IDUM2
00606       PARAM_CHECK(  1, 1 ) = IDUM1
00607 *
00608       PARAM_CHECK( 16, 2 ) = 1205
00609       PARAM_CHECK( 15, 2 ) = 1204
00610       PARAM_CHECK( 14, 2 ) = 1203
00611       PARAM_CHECK( 13, 2 ) = 1202
00612       PARAM_CHECK( 12, 2 ) = 1201
00613       PARAM_CHECK( 11, 2 ) = 11
00614       PARAM_CHECK( 10, 2 ) = 905
00615       PARAM_CHECK(  9, 2 ) = 904
00616       PARAM_CHECK(  8, 2 ) = 903
00617       PARAM_CHECK(  7, 2 ) = 901
00618       PARAM_CHECK(  6, 2 ) = 8
00619       PARAM_CHECK(  5, 2 ) = 4
00620       PARAM_CHECK(  4, 2 ) = 3
00621       PARAM_CHECK(  3, 2 ) = 16
00622       PARAM_CHECK(  2, 2 ) = 2
00623       PARAM_CHECK(  1, 2 ) = 1
00624 *
00625 *     Want to find errors with MIN( ), so if no error, set it to a big
00626 *     number. If there already is an error, multiply by the the
00627 *     descriptor multiplier.
00628 *
00629       IF( INFO.GE.0 ) THEN
00630          INFO = BIGNUM
00631       ELSE IF( INFO.LT.-DESCMULT ) THEN
00632          INFO = -INFO
00633       ELSE
00634          INFO = -INFO * DESCMULT
00635       END IF
00636 *
00637 *     Check consistency across processors
00638 *
00639       CALL GLOBCHK( ICTXT, 16, PARAM_CHECK, 16,
00640      $              PARAM_CHECK( 1, 3 ), INFO )
00641 *
00642 *     Prepare output: set info = 0 if no error, and divide by DESCMULT
00643 *     if error is not in a descriptor entry.
00644 *
00645       IF( INFO.EQ.BIGNUM ) THEN
00646          INFO = 0
00647       ELSE IF( MOD( INFO, DESCMULT ) .EQ. 0 ) THEN
00648          INFO = -INFO / DESCMULT
00649       ELSE
00650          INFO = -INFO
00651       END IF
00652 *
00653       IF( INFO.LT.0 ) THEN
00654          CALL PXERBLA( ICTXT, 'PZDTTRSV', -INFO )
00655          RETURN
00656       END IF
00657 *
00658 *     Quick return if possible
00659 *
00660       IF( N.EQ.0 )
00661      $   RETURN
00662 *
00663       IF( NRHS.EQ.0 )
00664      $   RETURN
00665 *
00666 *
00667 *     Adjust addressing into matrix space to properly get into
00668 *        the beginning part of the relevant data
00669 *
00670       PART_OFFSET = NB*( (JA-1)/(NPCOL*NB) )
00671 *
00672       IF ( (MYCOL-CSRC) .LT. (JA-PART_OFFSET-1)/NB ) THEN
00673          PART_OFFSET = PART_OFFSET + NB
00674       ENDIF
00675 *
00676       IF ( MYCOL .LT. CSRC ) THEN
00677          PART_OFFSET = PART_OFFSET - NB
00678       ENDIF
00679 *
00680 *     Form a new BLACS grid (the "standard form" grid) with only procs
00681 *        holding part of the matrix, of size 1xNP where NP is adjusted,
00682 *        starting at csrc=0, with JA modified to reflect dropped procs.
00683 *
00684 *     First processor to hold part of the matrix:
00685 *
00686       FIRST_PROC = MOD( ( JA-1 )/NB+CSRC, NPCOL )
00687 *
00688 *     Calculate new JA one while dropping off unused processors.
00689 *
00690       JA_NEW = MOD( JA-1, NB ) + 1
00691 *
00692 *     Save and compute new value of NP
00693 *
00694       NP_SAVE = NP
00695       NP = ( JA_NEW+N-2 )/NB + 1
00696 *
00697 *     Call utility routine that forms "standard-form" grid
00698 *
00699       CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE,
00700      $              FIRST_PROC, INT_ONE, NP )
00701 *
00702 *     Use new context from standard grid as context.
00703 *
00704       ICTXT_SAVE = ICTXT
00705       ICTXT = ICTXT_NEW
00706       DESCA_1XP( 2 ) = ICTXT_NEW
00707       DESCB_PX1( 2 ) = ICTXT_NEW
00708 *
00709 *     Get information about new grid.
00710 *
00711       CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
00712 *
00713 *     Drop out processors that do not have part of the matrix.
00714 *
00715       IF( MYROW .LT. 0 ) THEN
00716          GOTO 1234
00717       ENDIF
00718 *
00719 *     ********************************
00720 *     Values reused throughout routine
00721 *
00722 *     User-input value of partition size
00723 *
00724       PART_SIZE = NB
00725 *
00726 *     Number of columns in each processor
00727 *
00728       MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL )
00729 *
00730 *     Offset in columns to beginning of main partition in each proc
00731 *
00732       IF ( MYCOL .EQ. 0 ) THEN
00733         PART_OFFSET = PART_OFFSET+MOD( JA_NEW-1, PART_SIZE )
00734         MY_NUM_COLS = MY_NUM_COLS - MOD(JA_NEW-1, PART_SIZE )
00735       ENDIF
00736 *
00737 *     Size of main (or odd) partition in each processor
00738 *
00739       ODD_SIZE = MY_NUM_COLS
00740       IF ( MYCOL .LT. NP-1 ) THEN
00741          ODD_SIZE = ODD_SIZE - INT_ONE
00742       ENDIF
00743 *
00744 *     Offset to workspace for Upper triangular factor
00745 *
00746       WORK_U = INT_ONE*ODD_SIZE + 3
00747 *
00748 *
00749 *
00750 *     Begin main code
00751 *
00752       IF ( LSAME( UPLO, 'L' ) ) THEN
00753 *
00754       IF ( LSAME( TRANS, 'N' ) ) THEN
00755 *
00756 *        Frontsolve
00757 *
00758 *
00759 ******************************************
00760 *       Local computation phase
00761 ******************************************
00762 *
00763 *       Use main partition in each processor to solve locally
00764 *
00765         CALL ZDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, DL( PART_OFFSET+2 ),
00766      $                D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ),
00767      $                B( PART_OFFSET+1 ), LLDB, INFO )
00768 *
00769 *
00770         IF ( MYCOL .LT. NP-1 ) THEN
00771 *         Use factorization of odd-even connection block to modify
00772 *           locally stored portion of right hand side(s)
00773 *
00774             CALL ZAXPY( NRHS, -DL( PART_OFFSET+ODD_SIZE+1 ),
00775      $            B( PART_OFFSET+ODD_SIZE ), LLDB,
00776      $            B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
00777 *
00778         ENDIF
00779 *
00780 *
00781         IF ( MYCOL .NE. 0 ) THEN
00782 *         Use the "spike" fillin to calculate contribution to previous
00783 *           processor's righthand-side.
00784 *
00785             CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, ODD_SIZE, -CONE,
00786      $                  AF( 1 ), ODD_SIZE, B( PART_OFFSET+1 ), LLDB,
00787      $                  CZERO, WORK( 1+INT_ONE-INT_ONE ), INT_ONE )
00788         ENDIF
00789 *
00790 *
00791 ************************************************
00792 *       Formation and solution of reduced system
00793 ************************************************
00794 *
00795 *
00796 *       Send modifications to prior processor's right hand sides
00797 *
00798         IF( MYCOL .GT. 0) THEN
00799 *
00800           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
00801      $                     WORK( 1 ), INT_ONE,
00802      $                     0, MYCOL - 1 )
00803 *
00804         ENDIF
00805 *
00806 *       Receive modifications to processor's right hand sides
00807 *
00808         IF( MYCOL .LT. NPCOL-1) THEN
00809 *
00810           CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
00811      $                     WORK( 1 ), INT_ONE,
00812      $                     0, MYCOL + 1 )
00813 *
00814 *         Combine contribution to locally stored right hand sides
00815 *
00816           CALL ZMATADD( INT_ONE, NRHS, CONE,
00817      $                WORK( 1 ), INT_ONE, CONE,
00818      $                B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
00819 *
00820         ENDIF
00821 *
00822 *
00823 *       The last processor does not participate in the solution of the
00824 *       reduced system, having sent its contribution already.
00825         IF( MYCOL .EQ. NPCOL-1 ) THEN
00826           GOTO 14
00827         ENDIF
00828 *
00829 *
00830 *       *************************************
00831 *       Modification Loop
00832 *
00833 *       The distance for sending and receiving for each level starts
00834 *         at 1 for the first level.
00835         LEVEL_DIST = 1
00836 *
00837 *       Do until this proc is needed to modify other procs' equations
00838 *
00839    12   CONTINUE
00840         IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 11
00841 *
00842 *         Receive and add contribution to righthand sides from left
00843 *
00844           IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN
00845 *
00846             CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
00847      $                     WORK( 1 ),
00848      $                     INT_ONE, 0, MYCOL-LEVEL_DIST )
00849 *
00850             CALL ZMATADD( INT_ONE, NRHS, CONE,
00851      $                WORK( 1 ), INT_ONE, CONE,
00852      $                B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
00853 *
00854           ENDIF
00855 *
00856 *         Receive and add contribution to righthand sides from right
00857 *
00858           IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN
00859 *
00860             CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
00861      $                     WORK( 1 ),
00862      $                     INT_ONE, 0, MYCOL+LEVEL_DIST )
00863 *
00864             CALL ZMATADD( INT_ONE, NRHS, CONE,
00865      $                WORK( 1 ), INT_ONE, CONE,
00866      $                B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
00867 *
00868           ENDIF
00869 *
00870           LEVEL_DIST = LEVEL_DIST*2
00871 *
00872         GOTO 12
00873    11   CONTINUE
00874 *       [End of GOTO Loop]
00875 *
00876 *
00877 *
00878 *       *********************************
00879 *       Calculate and use this proc's blocks to modify other procs
00880 *
00881 *       Solve with diagonal block
00882 *
00883         CALL ZTBTRS( 'L', 'N', 'U', INT_ONE, MIN( INT_ONE, INT_ONE-1 ),
00884      $               NRHS, AF( ODD_SIZE+2 ), INT_ONE+1,
00885      $               B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
00886 *
00887         IF( INFO.NE.0 ) THEN
00888           GO TO 1000
00889         ENDIF
00890 *
00891 *
00892 *
00893 *       *********
00894         IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN
00895 *
00896 *         Calculate contribution from this block to next diagonal block
00897 *
00898           CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE,
00899      $                     AF( (ODD_SIZE)*INT_ONE+1 ),
00900      $                     INT_ONE,
00901      $                     B( PART_OFFSET+ODD_SIZE+1 ),
00902      $                     LLDB, CZERO,
00903      $                     WORK( 1 ),
00904      $                     INT_ONE )
00905 *
00906 *         Send contribution to diagonal block's owning processor.
00907 *
00908           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
00909      $                     WORK( 1 ),
00910      $                     INT_ONE, 0, MYCOL+LEVEL_DIST )
00911 *
00912         ENDIF
00913 *       End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
00914 *
00915 *       ************
00916         IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND.
00917      $      ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN
00918 *
00919 *
00920 *         Use offdiagonal block to calculate modification to diag block
00921 *           of processor to the left
00922 *
00923           CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE,
00924      $                     AF( ODD_SIZE*INT_ONE+2+1 ),
00925      $                     INT_ONE,
00926      $                     B( PART_OFFSET+ODD_SIZE+1 ),
00927      $                     LLDB, CZERO,
00928      $                     WORK( 1 ),
00929      $                     INT_ONE )
00930 *
00931 *         Send contribution to diagonal block's owning processor.
00932 *
00933           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
00934      $                     WORK( 1 ),
00935      $                     INT_ONE, 0, MYCOL-LEVEL_DIST )
00936 *
00937         ENDIF
00938 *       End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
00939 *
00940    14  CONTINUE
00941 *
00942       ELSE
00943 *
00944 ******************** BACKSOLVE *************************************
00945 *
00946 ********************************************************************
00947 *     .. Begin reduced system phase of algorithm ..
00948 ********************************************************************
00949 *
00950 *
00951 *
00952 *       The last processor does not participate in the solution of the
00953 *       reduced system and just waits to receive its solution.
00954         IF( MYCOL .EQ. NPCOL-1 ) THEN
00955           GOTO 24
00956         ENDIF
00957 *
00958 *       Determine number of steps in tree loop
00959 *
00960         LEVEL_DIST = 1
00961    27   CONTINUE
00962         IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 26
00963 *
00964           LEVEL_DIST = LEVEL_DIST*2
00965 *
00966         GOTO 27
00967    26   CONTINUE
00968 *
00969 *
00970         IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND.
00971      $      ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN
00972 *
00973 *         Receive solution from processor to left
00974 *
00975           CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
00976      $                     WORK( 1 ),
00977      $                     INT_ONE, 0, MYCOL-LEVEL_DIST )
00978 *
00979 *
00980 *         Use offdiagonal block to calculate modification to RHS stored
00981 *           on this processor
00982 *
00983           CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE,
00984      $                     AF( ODD_SIZE*INT_ONE+2+1 ),
00985      $                     INT_ONE,
00986      $                     WORK( 1 ),
00987      $                     INT_ONE, CONE,
00988      $                     B( PART_OFFSET+ODD_SIZE+1 ),
00989      $                     LLDB )
00990         ENDIF
00991 *       End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
00992 *
00993 *
00994         IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN
00995 *
00996 *         Receive solution from processor to right
00997 *
00998           CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
00999      $                     WORK( 1 ),
01000      $                     INT_ONE, 0, MYCOL+LEVEL_DIST )
01001 *
01002 *         Calculate contribution from this block to next diagonal block
01003 *
01004           CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE,
01005      $                     AF( (ODD_SIZE)*INT_ONE+1 ),
01006      $                     INT_ONE,
01007      $                     WORK( 1 ),
01008      $                     INT_ONE, CONE,
01009      $                     B( PART_OFFSET+ODD_SIZE+1 ),
01010      $                     LLDB )
01011 *
01012         ENDIF
01013 *       End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
01014 *
01015 *
01016 *       Solve with diagonal block
01017 *
01018         CALL ZTBTRS( 'L', 'C', 'U', INT_ONE, MIN( INT_ONE, INT_ONE-1 ),
01019      $               NRHS, AF( ODD_SIZE+2 ), INT_ONE+1,
01020      $               B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
01021 *
01022         IF( INFO.NE.0 ) THEN
01023           GO TO 1000
01024         ENDIF
01025 *
01026 *
01027 *
01028 ***Modification Loop *******
01029 *
01030    22   CONTINUE
01031         IF( LEVEL_DIST .EQ. 1 ) GOTO 21
01032 *
01033           LEVEL_DIST = LEVEL_DIST/2
01034 *
01035 *         Send solution to the right
01036 *
01037           IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN
01038 *
01039           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
01040      $                     B( PART_OFFSET+ODD_SIZE+1 ),
01041      $                     LLDB, 0, MYCOL+LEVEL_DIST )
01042 *
01043           ENDIF
01044 *
01045 *         Send solution to left
01046 *
01047           IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN
01048 *
01049           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
01050      $                     B( PART_OFFSET+ODD_SIZE+1 ),
01051      $                     LLDB, 0, MYCOL-LEVEL_DIST )
01052 *
01053           ENDIF
01054 *
01055         GOTO 22
01056    21   CONTINUE
01057 *       [End of GOTO Loop]
01058 *
01059    24   CONTINUE
01060 *          [Processor npcol - 1 jumped to here to await next stage]
01061 *
01062 *******************************
01063 *       Reduced system has been solved, communicate solutions to nearest
01064 *         neighbors in preparation for local computation phase.
01065 *
01066 *
01067 *       Send elements of solution to next proc
01068 *
01069         IF( MYCOL .LT. NPCOL-1) THEN
01070 *
01071           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
01072      $                     B( PART_OFFSET+ODD_SIZE+1 ), LLDB,
01073      $                     0, MYCOL +1 )
01074 *
01075         ENDIF
01076 *
01077 *       Receive modifications to processor's right hand sides
01078 *
01079         IF( MYCOL .GT. 0) THEN
01080 *
01081           CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
01082      $                     WORK( 1 ), INT_ONE,
01083      $                     0, MYCOL - 1 )
01084 *
01085         ENDIF
01086 *
01087 *
01088 *
01089 **********************************************
01090 *       Local computation phase
01091 **********************************************
01092 *
01093         IF ( MYCOL .NE. 0 ) THEN
01094 *         Use the "spike" fillin to calculate contribution from previous
01095 *           processor's solution.
01096 *
01097           CALL ZGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -CONE, AF( 1 ),
01098      $                ODD_SIZE, WORK( 1+INT_ONE-INT_ONE ), INT_ONE,
01099      $                CONE, B( PART_OFFSET+1 ), LLDB )
01100 *
01101         ENDIF
01102 *
01103 *
01104         IF ( MYCOL .LT. NP-1 ) THEN
01105 *         Use factorization of odd-even connection block to modify
01106 *           locally stored portion of right hand side(s)
01107 *
01108             CALL ZAXPY( NRHS, -DCONJG( DL( PART_OFFSET+ODD_SIZE+1 ) ),
01109      $                 B( PART_OFFSET+ODD_SIZE+1 ), LLDB,
01110      $                 B( PART_OFFSET+ODD_SIZE ), LLDB )
01111 *
01112         ENDIF
01113 *
01114 *       Use main partition in each processor to solve locally
01115 *
01116         CALL ZDTTRSV( UPLO, 'C', ODD_SIZE, NRHS, DL( PART_OFFSET+2 ),
01117      $                D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ),
01118      $                B( PART_OFFSET+1 ), LLDB, INFO )
01119 *
01120       ENDIF
01121 *     End of "IF( LSAME( TRANS, 'N' ) )"...
01122 *
01123 *
01124       ELSE
01125 ***************************************************************
01126 *     CASE UPLO = 'U'                                         *
01127 ***************************************************************
01128       IF ( LSAME( TRANS, 'C' ) ) THEN
01129 *
01130 *        Frontsolve
01131 *
01132 *
01133 ******************************************
01134 *       Local computation phase
01135 ******************************************
01136 *
01137 *       Use main partition in each processor to solve locally
01138 *
01139         CALL ZDTTRSV( UPLO, 'C', ODD_SIZE, NRHS, DL( PART_OFFSET+2 ),
01140      $                D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ),
01141      $                B( PART_OFFSET+1 ), LLDB, INFO )
01142 *
01143 *
01144         IF ( MYCOL .LT. NP-1 ) THEN
01145 *         Use factorization of odd-even connection block to modify
01146 *           locally stored portion of right hand side(s)
01147 *
01148             CALL ZAXPY( NRHS, -DCONJG( DU( PART_OFFSET+ODD_SIZE ) ),
01149      $            B( PART_OFFSET+ODD_SIZE ), LLDB,
01150      $            B( PART_OFFSET+ODD_SIZE+1 ), LLDB )
01151 *
01152         ENDIF
01153 *
01154 *
01155         IF ( MYCOL .NE. 0 ) THEN
01156 *         Use the "spike" fillin to calculate contribution to previous
01157 *           processor's righthand-side.
01158 *
01159             CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, ODD_SIZE, -CONE,
01160      $                  AF( WORK_U+1 ), ODD_SIZE, B( PART_OFFSET+1 ),
01161      $                  LLDB, CZERO, WORK( 1+INT_ONE-INT_ONE ),
01162      $                  INT_ONE )
01163         ENDIF
01164 *
01165 *
01166 ************************************************
01167 *       Formation and solution of reduced system
01168 ************************************************
01169 *
01170 *
01171 *       Send modifications to prior processor's right hand sides
01172 *
01173         IF( MYCOL .GT. 0) THEN
01174 *
01175           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
01176      $                     WORK( 1 ), INT_ONE,
01177      $                     0, MYCOL - 1 )
01178 *
01179         ENDIF
01180 *
01181 *       Receive modifications to processor's right hand sides
01182 *
01183         IF( MYCOL .LT. NPCOL-1) THEN
01184 *
01185           CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
01186      $                     WORK( 1 ), INT_ONE,
01187      $                     0, MYCOL + 1 )
01188 *
01189 *         Combine contribution to locally stored right hand sides
01190 *
01191           CALL ZMATADD( INT_ONE, NRHS, CONE,
01192      $                WORK( 1 ), INT_ONE, CONE,
01193      $                B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
01194 *
01195         ENDIF
01196 *
01197 *
01198 *       The last processor does not participate in the solution of the
01199 *       reduced system, having sent its contribution already.
01200         IF( MYCOL .EQ. NPCOL-1 ) THEN
01201           GOTO 44
01202         ENDIF
01203 *
01204 *
01205 *       *************************************
01206 *       Modification Loop
01207 *
01208 *       The distance for sending and receiving for each level starts
01209 *         at 1 for the first level.
01210         LEVEL_DIST = 1
01211 *
01212 *       Do until this proc is needed to modify other procs' equations
01213 *
01214    42   CONTINUE
01215         IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 41
01216 *
01217 *         Receive and add contribution to righthand sides from left
01218 *
01219           IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN
01220 *
01221             CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
01222      $                     WORK( 1 ),
01223      $                     INT_ONE, 0, MYCOL-LEVEL_DIST )
01224 *
01225             CALL ZMATADD( INT_ONE, NRHS, CONE,
01226      $                WORK( 1 ), INT_ONE, CONE,
01227      $                B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
01228 *
01229           ENDIF
01230 *
01231 *         Receive and add contribution to righthand sides from right
01232 *
01233           IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN
01234 *
01235             CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
01236      $                     WORK( 1 ),
01237      $                     INT_ONE, 0, MYCOL+LEVEL_DIST )
01238 *
01239             CALL ZMATADD( INT_ONE, NRHS, CONE,
01240      $                WORK( 1 ), INT_ONE, CONE,
01241      $                B( PART_OFFSET+ODD_SIZE + 1 ), LLDB )
01242 *
01243           ENDIF
01244 *
01245           LEVEL_DIST = LEVEL_DIST*2
01246 *
01247         GOTO 42
01248    41   CONTINUE
01249 *       [End of GOTO Loop]
01250 *
01251 *
01252 *
01253 *       *********************************
01254 *       Calculate and use this proc's blocks to modify other procs
01255 *
01256 *       Solve with diagonal block
01257 *
01258         CALL ZTBTRS( 'U', 'C', 'N', INT_ONE, MIN( INT_ONE, INT_ONE-1 ),
01259      $               NRHS, AF( ODD_SIZE+2 ), INT_ONE+1,
01260      $               B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
01261 *
01262         IF( INFO.NE.0 ) THEN
01263           GO TO 1000
01264         ENDIF
01265 *
01266 *
01267 *
01268 *       *********
01269         IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN
01270 *
01271 *         Calculate contribution from this block to next diagonal block
01272 *
01273           CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE,
01274      $                     AF( WORK_U+(ODD_SIZE)*INT_ONE+1 ),
01275      $                     INT_ONE,
01276      $                     B( PART_OFFSET+ODD_SIZE+1 ),
01277      $                     LLDB, CZERO,
01278      $                     WORK( 1 ),
01279      $                     INT_ONE )
01280 *
01281 *         Send contribution to diagonal block's owning processor.
01282 *
01283           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
01284      $                     WORK( 1 ),
01285      $                     INT_ONE, 0, MYCOL+LEVEL_DIST )
01286 *
01287         ENDIF
01288 *       End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
01289 *
01290 *       ************
01291         IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND.
01292      $      ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN
01293 *
01294 *
01295 *         Use offdiagonal block to calculate modification to diag block
01296 *           of processor to the left
01297 *
01298           CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE,
01299      $                     AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ),
01300      $                     INT_ONE,
01301      $                     B( PART_OFFSET+ODD_SIZE+1 ),
01302      $                     LLDB, CZERO,
01303      $                     WORK( 1 ),
01304      $                     INT_ONE )
01305 *
01306 *         Send contribution to diagonal block's owning processor.
01307 *
01308           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
01309      $                     WORK( 1 ),
01310      $                     INT_ONE, 0, MYCOL-LEVEL_DIST )
01311 *
01312         ENDIF
01313 *       End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
01314 *
01315    44  CONTINUE
01316 *
01317       ELSE
01318 *
01319 ******************** BACKSOLVE *************************************
01320 *
01321 ********************************************************************
01322 *     .. Begin reduced system phase of algorithm ..
01323 ********************************************************************
01324 *
01325 *
01326 *
01327 *       The last processor does not participate in the solution of the
01328 *       reduced system and just waits to receive its solution.
01329         IF( MYCOL .EQ. NPCOL-1 ) THEN
01330           GOTO 54
01331         ENDIF
01332 *
01333 *       Determine number of steps in tree loop
01334 *
01335         LEVEL_DIST = 1
01336    57   CONTINUE
01337         IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) .NE. 0 ) GOTO 56
01338 *
01339           LEVEL_DIST = LEVEL_DIST*2
01340 *
01341         GOTO 57
01342    56   CONTINUE
01343 *
01344 *
01345         IF( (MYCOL/LEVEL_DIST .GT. 0 ).AND.
01346      $      ( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-1 ) ) THEN
01347 *
01348 *         Receive solution from processor to left
01349 *
01350           CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
01351      $                     WORK( 1 ),
01352      $                     INT_ONE, 0, MYCOL-LEVEL_DIST )
01353 *
01354 *
01355 *         Use offdiagonal block to calculate modification to RHS stored
01356 *           on this processor
01357 *
01358           CALL ZGEMM( 'C', 'N', INT_ONE, NRHS, INT_ONE, -CONE,
01359      $                     AF( WORK_U+ODD_SIZE*INT_ONE+2+1 ),
01360      $                     INT_ONE,
01361      $                     WORK( 1 ),
01362      $                     INT_ONE, CONE,
01363      $                     B( PART_OFFSET+ODD_SIZE+1 ),
01364      $                     LLDB )
01365         ENDIF
01366 *       End of "if( mycol/level_dist.le. (npcol-1)/level_dist -1 )..."
01367 *
01368 *
01369         IF( MYCOL/LEVEL_DIST .LE. (NPCOL-1)/LEVEL_DIST-2 )THEN
01370 *
01371 *         Receive solution from processor to right
01372 *
01373           CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
01374      $                     WORK( 1 ),
01375      $                     INT_ONE, 0, MYCOL+LEVEL_DIST )
01376 *
01377 *         Calculate contribution from this block to next diagonal block
01378 *
01379           CALL ZGEMM( 'N', 'N', INT_ONE, NRHS, INT_ONE, -CONE,
01380      $                     AF( WORK_U+(ODD_SIZE)*INT_ONE+1 ),
01381      $                     INT_ONE,
01382      $                     WORK( 1 ),
01383      $                     INT_ONE, CONE,
01384      $                     B( PART_OFFSET+ODD_SIZE+1 ),
01385      $                     LLDB )
01386 *
01387         ENDIF
01388 *       End of "if( mycol/level_dist .le. (npcol-1)/level_dist-2 )..."
01389 *
01390 *
01391 *       Solve with diagonal block
01392 *
01393         CALL ZTBTRS( 'U', 'N', 'N', INT_ONE, MIN( INT_ONE, INT_ONE-1 ),
01394      $               NRHS, AF( ODD_SIZE+2 ), INT_ONE+1,
01395      $               B( PART_OFFSET+ODD_SIZE+1 ), LLDB, INFO )
01396 *
01397         IF( INFO.NE.0 ) THEN
01398           GO TO 1000
01399         ENDIF
01400 *
01401 *
01402 *
01403 ***Modification Loop *******
01404 *
01405    52   CONTINUE
01406         IF( LEVEL_DIST .EQ. 1 ) GOTO 51
01407 *
01408           LEVEL_DIST = LEVEL_DIST/2
01409 *
01410 *         Send solution to the right
01411 *
01412           IF( MYCOL+LEVEL_DIST .LT. NPCOL-1 ) THEN
01413 *
01414           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
01415      $                     B( PART_OFFSET+ODD_SIZE+1 ),
01416      $                     LLDB, 0, MYCOL+LEVEL_DIST )
01417 *
01418           ENDIF
01419 *
01420 *         Send solution to left
01421 *
01422           IF( MYCOL-LEVEL_DIST .GE. 0 ) THEN
01423 *
01424           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
01425      $                     B( PART_OFFSET+ODD_SIZE+1 ),
01426      $                     LLDB, 0, MYCOL-LEVEL_DIST )
01427 *
01428           ENDIF
01429 *
01430         GOTO 52
01431    51   CONTINUE
01432 *       [End of GOTO Loop]
01433 *
01434    54   CONTINUE
01435 *          [Processor npcol - 1 jumped to here to await next stage]
01436 *
01437 *******************************
01438 *       Reduced system has been solved, communicate solutions to nearest
01439 *         neighbors in preparation for local computation phase.
01440 *
01441 *
01442 *       Send elements of solution to next proc
01443 *
01444         IF( MYCOL .LT. NPCOL-1) THEN
01445 *
01446           CALL ZGESD2D( ICTXT, INT_ONE, NRHS,
01447      $                     B( PART_OFFSET+ODD_SIZE+1 ), LLDB,
01448      $                     0, MYCOL +1 )
01449 *
01450         ENDIF
01451 *
01452 *       Receive modifications to processor's right hand sides
01453 *
01454         IF( MYCOL .GT. 0) THEN
01455 *
01456           CALL ZGERV2D( ICTXT, INT_ONE, NRHS,
01457      $                     WORK( 1 ), INT_ONE,
01458      $                     0, MYCOL - 1 )
01459 *
01460         ENDIF
01461 *
01462 *
01463 *
01464 **********************************************
01465 *       Local computation phase
01466 **********************************************
01467 *
01468         IF ( MYCOL .NE. 0 ) THEN
01469 *         Use the "spike" fillin to calculate contribution from previous
01470 *           processor's solution.
01471 *
01472           CALL ZGEMM( 'N', 'N', ODD_SIZE, NRHS, INT_ONE, -CONE,
01473      $                AF( WORK_U+1 ), ODD_SIZE,
01474      $                WORK( 1+INT_ONE-INT_ONE ), INT_ONE, CONE,
01475      $                B( PART_OFFSET+1 ), LLDB )
01476 *
01477         ENDIF
01478 *
01479 *
01480         IF ( MYCOL .LT. NP-1 ) THEN
01481 *         Use factorization of odd-even connection block to modify
01482 *           locally stored portion of right hand side(s)
01483 *
01484             CALL ZAXPY( NRHS, -( DU( PART_OFFSET+ODD_SIZE ) ),
01485      $                 B( PART_OFFSET+ODD_SIZE+1 ), LLDB,
01486      $                 B( PART_OFFSET+ODD_SIZE ), LLDB )
01487 *
01488         ENDIF
01489 *
01490 *       Use main partition in each processor to solve locally
01491 *
01492         CALL ZDTTRSV( UPLO, 'N', ODD_SIZE, NRHS, DU( PART_OFFSET+2 ),
01493      $                D( PART_OFFSET+1 ), DU( PART_OFFSET+1 ),
01494      $                B( PART_OFFSET+1 ), LLDB, INFO )
01495 *
01496       ENDIF
01497 *     End of "IF( LSAME( TRANS, 'N' ) )"...
01498 *
01499 *
01500       ENDIF
01501 *     End of "IF( LSAME( UPLO, 'L' ) )"...
01502  1000 CONTINUE
01503 *
01504 *
01505 *     Free BLACS space used to hold standard-form grid.
01506 *
01507       IF( ICTXT_SAVE .NE. ICTXT_NEW ) THEN
01508          CALL BLACS_GRIDEXIT( ICTXT_NEW )
01509       ENDIF
01510 *
01511  1234 CONTINUE
01512 *
01513 *     Restore saved input parameters
01514 *
01515       ICTXT = ICTXT_SAVE
01516       NP = NP_SAVE
01517 *
01518 *     Output minimum worksize
01519 *
01520       WORK( 1 ) = WORK_SIZE_MIN
01521 *
01522 *
01523       RETURN
01524 *
01525 *     End of PZDTTRSV
01526 *
01527       END