ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pbdtrnv.f
Go to the documentation of this file.
00001       SUBROUTINE PBDTRNV( ICONTXT, XDIST, TRANS, N, NB, NZ, X, INCX,
00002      $                    BETA, Y, INCY, IXROW, IXCOL, IYROW, IYCOL,
00003      $                    WORK )
00004 *
00005 *  -- PB-BLAS routine (version 2.1) --
00006 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
00007 *     April 28, 1996
00008 *
00009 *     Jaeyoung Choi, Oak Ridge National Laboratory
00010 *     Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
00011 *     David Walker,  Oak Ridge National Laboratory
00012 *
00013 *     .. Scalar Arguments ..
00014       CHARACTER*1        TRANS, XDIST
00015       INTEGER            ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
00016      $                   IYROW, N, NB, NZ
00017       DOUBLE PRECISION   BETA
00018 *     ..
00019 *     .. Array Arguments ..
00020       DOUBLE PRECISION   WORK( * ), X( * ), Y( * )
00021 *     ..
00022 *
00023 *  Purpose
00024 *  =======
00025 *
00026 *  PBDTRNV transposes a column vector to row vector, or a row vector to
00027 *  column vector by reallocating data distribution.
00028 *
00029 *     Y := X'
00030 *
00031 *  where X and Y are N vectors.
00032 *
00033 *  Parameters
00034 *  ==========
00035 *
00036 *  ICONTXT (input) INTEGER
00037 *          ICONTXT is the BLACS mechanism for partitioning communication
00038 *          space.  A defining property of a context is that a message in
00039 *          a context cannot be sent or received in another context.  The
00040 *          BLACS context includes the definition of a grid, and each
00041 *          process' coordinates in it.
00042 *
00043 *  XDIST   (input) CHARACTER*1
00044 *          XDIST specifies whether X is a column vector or a row vector,
00045 *
00046 *            XDIST = 'C',  X is a column vector (distributed columnwise)
00047 *            XDIST = 'R',  X is a row vector    (distributed rowwise)
00048 *
00049 *  TRANS   (input) CHARACTER*1
00050 *          TRANS specifies whether the transposed format is transpose
00051 *          or conjugate transpose.  If the vectors X and Y are real,
00052 *          the argument is ignored.
00053 *
00054 *             TRANS = 'T',  transpose
00055 *             TRANS = 'C',  conjugate transpose
00056 *
00057 *  N       (input) INTEGER
00058 *          N specifies the (global) number of the vector X and the
00059 *          vector Y.  N >= 0.
00060 *
00061 *  NB      (input) INTEGER
00062 *          NB specifies the block size of vectors X and Y.  NB >= 0.
00063 *
00064 *  NZ      (input) INTEGER
00065 *          NZ is the column offset to specify the column distance from
00066 *          the beginning of the block to the first element of the
00067 *          vector X, and the row offset to the first element of the
00068 *          vector Y if XDIST = 'C'.
00069 *          Otherwise, it is row offset to specify the row distance
00070 *          from the beginning of the block to the first element of the
00071 *          vector X, and the column offset to the first element of the
00072 *          vector Y.  0 < NZ <= NB.
00073 *
00074 *  X       (input) DOUBLE PRECISION array of dimension  at least
00075 *          ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
00076 *          ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
00077 *          The incremented array X must contain the vector X.
00078 *
00079 *  INCX    (input) INTEGER
00080 *          INCX specifies the increment for the elements of X.
00081 *          INCX <> 0.
00082 *
00083 *  BETA    (input) DOUBLE PRECISION
00084 *          BETA specifies scaler beta.
00085 *
00086 *  Y       (input/output) DOUBLE PRECISION array of dimension at least
00087 *          ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
00088 *          ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
00089 *          The incremented array Y must contain the vector Y.
00090 *          Y will not be referenced if beta is zero.
00091 *
00092 *  INCY    (input) INTEGER
00093 *          INCY specifies the increment for the elements of Y.
00094 *          INCY <> 0.
00095 *
00096 *  IXROW   (input) INTEGER
00097 *          IXROW specifies a row of the process template, which holds
00098 *          the first element of the vector X. If X is a row vector and
00099 *          all rows of processes have a copy of X, then set IXROW = -1.
00100 *
00101 *  IXCOL   (input) INTEGER
00102 *          IXCOL specifies  a column of the process template,
00103 *          which holds the first element of the vector X.  If  X is  a
00104 *          column block and all columns of processes have a copy of X,
00105 *          then set IXCOL = -1.
00106 *
00107 *  IYROW   (input) INTEGER
00108 *          IYROW specifies the current row process which holds the
00109 *          first element of the vector Y, which is transposed of X.
00110 *          If X  is a column vector and the transposed  row vector Y is
00111 *          distributed all rows of processes, set IYROW = -1.
00112 *
00113 *  IYCOL   (input) INTEGER
00114 *          IYCOL specifies  the current column process  which holds
00115 *          the first element of the vector Y, which is transposed of Y.
00116 *          If X is a row block and the transposed column vector Y is
00117 *          distributed all columns of processes, set IYCOL = -1.
00118 *
00119 *  WORK    (workspace) DOUBLE PRECISION array of dimension Size(WORK).
00120 *          It needs extra working space of x**T or x**H.
00121 *
00122 *  Parameters Details
00123 *  ==================
00124 *
00125 *  Nx      It is a local portion  of N owned by a process, where x is
00126 *          replaced by  either p (=NPROW) or q (=NPCOL)).  The value is
00127 *          determined by N, NB, NZ, x, and MI, where NB is a block size,
00128 *          NZ is a offset from the beginning of the block,  and MI is a
00129 *          row or column position  in a process template. Nx is equal
00130 *          to  or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
00131 *
00132 *  Communication Scheme
00133 *  ====================
00134 *
00135 *  The communication scheme of the routine is set to '1-tree', which is
00136 *  fan-out.  (For details, see BLACS user's guide.)
00137 *
00138 *  Memory Requirement of WORK
00139 *  ==========================
00140 *
00141 *  NN   = N + NZ
00142 *  Npb  = CEIL( NN, NB*NPROW )
00143 *  Nqb  = CEIL( NN, NB*NPCOL )
00144 *  LCMP = LCM / NPROW
00145 *  LCMQ = LCM / NPCOL
00146 *
00147 *   (1) XDIST = 'C'
00148 *     (a) IXCOL != -1
00149 *         Size(WORK) = CEIL(Nqb,LCMQ)*NB
00150 *     (b) IXCOL = -1
00151 *         Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
00152 *
00153 *   (2) XDIST = 'R'
00154 *     (a) IXROW != -1
00155 *         Size(WORK) = CEIL(Npb,LCMP)*NB
00156 *     (b) IXROW = -1
00157 *         Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
00158 *
00159 *  Notes
00160 *  -----
00161 *  More precise space can be computed as
00162 *
00163 *  CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
00164 *  CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
00165 *
00166 *  =====================================================================
00167 *
00168 *     .. Parameters ..
00169       DOUBLE PRECISION   ONE, ZERO
00170       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
00171 *     ..
00172 *     .. Local Scalars ..
00173       LOGICAL            COLFORM, ROWFORM
00174       INTEGER            I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
00175      $                   LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW,
00176      $                   MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW,
00177      $                   NQ, NQ0, NQ1
00178       DOUBLE PRECISION   TBETA
00179 *     ..
00180 *     .. External Functions ..
00181       LOGICAL            LSAME
00182       INTEGER            ILCM, ICEIL, NUMROC
00183       EXTERNAL           LSAME, ILCM, ICEIL, NUMROC
00184 *     ..
00185 *     .. External Subroutines ..
00186       EXTERNAL           BLACS_GRIDINFO, DGEBR2D, DGEBS2D, DGERV2D,
00187      $                   DGESD2D, PBDTR2A1, PBDTR2B1, PBDTRGET,
00188      $                   PBDTRST1, PBDVECADD, PXERBLA
00189 *     ..
00190 *     .. Intrinsic Functions ..
00191       INTRINSIC          MAX, MIN, MOD
00192 *     ..
00193 *     .. Executable Statements ..
00194 *
00195 *     Quick return if possible.
00196 *
00197       IF( N.EQ.0 ) RETURN
00198 *
00199       CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL )
00200 *
00201       COLFORM = LSAME( XDIST, 'C' )
00202       ROWFORM = LSAME( XDIST, 'R' )
00203 *
00204 *     Test the input parameters.
00205 *
00206       INFO = 0
00207       IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN
00208          INFO = 2
00209       ELSE IF( N   .LT.0                          ) THEN
00210          INFO = 4
00211       ELSE IF( NB  .LT.1                          ) THEN
00212          INFO = 5
00213       ELSE IF( NZ  .LT.0 .OR. NZ.GE.NB            ) THEN
00214          INFO = 6
00215       ELSE IF( INCX.EQ.0                          ) THEN
00216          INFO = 8
00217       ELSE IF( INCY.EQ.0                          ) THEN
00218          INFO = 11
00219       ELSE IF( IXROW.LT.-1 .OR. IXROW.GE.NPROW .OR.
00220      $       ( IXROW.EQ.-1 .AND. COLFORM )        ) THEN
00221          INFO = 12
00222       ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL .OR.
00223      $       ( IXCOL.EQ.-1 .AND. ROWFORM )        ) THEN
00224          INFO = 13
00225       ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW .OR.
00226      $       ( IYROW.EQ.-1 .AND. ROWFORM )        ) THEN
00227          INFO = 14
00228       ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL .OR.
00229      $       ( IYCOL.EQ.-1 .AND. COLFORM )        ) THEN
00230          INFO = 15
00231       END IF
00232 *
00233    10 CONTINUE
00234       IF( INFO.NE.0 ) THEN
00235          CALL PXERBLA( ICONTXT, 'PBDTRNV ', INFO )
00236          RETURN
00237       END IF
00238 *
00239 *     Start the operations.
00240 *
00241 *     LCM : the least common multiple of NPROW and NPCOL
00242 *
00243       LCM  = ILCM( NPROW, NPCOL )
00244       LCMP = LCM   / NPROW
00245       LCMQ = LCM   / NPCOL
00246       IGD  = NPCOL / LCMP
00247       NN   = N + NZ
00248 *
00249 *     When x is a column vector
00250 *
00251       IF( COLFORM ) THEN
00252 *
00253 *       Form  y <== x'  ( x is a column vector )
00254 *
00255 *                                        ||
00256 *                                        ||
00257 *            _____________               ||
00258 *            -----(y)-----      <==     (x)
00259 *                                        ||
00260 *                                        ||
00261 *                                        ||
00262 *
00263         IF(      IXROW.LT.0  .OR. IXROW.GE.NPROW ) THEN
00264           INFO = 12
00265         ELSE IF( IXCOL.LT.-1 .OR. IXCOL.GE.NPCOL ) THEN
00266           INFO = 13
00267         ELSE IF( IYROW.LT.-1 .OR. IYROW.GE.NPROW ) THEN
00268           INFO = 14
00269         ELSE IF( IYCOL.LT.0  .OR. IYCOL.GE.NPCOL ) THEN
00270           INFO = 15
00271         END IF
00272         IF( INFO.NE.0 ) GO TO 10
00273 *
00274 *       MRROW : row relative position in template from IXROW
00275 *       MRCOL : column relative position in template from IYCOL
00276 *
00277         MRROW = MOD( NPROW+MYROW-IXROW, NPROW )
00278         MRCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL )
00279         JYROW = IYROW
00280         IF( IYROW.EQ.-1 ) JYROW = IXROW
00281 *
00282         NP  = NUMROC( NN, NB, MYROW, IXROW, NPROW )
00283         IF( MRROW.EQ.0 ) NP = NP - NZ
00284         NQ  = NUMROC( NN, NB, MYCOL, IYCOL, NPCOL )
00285         IF( MRCOL.EQ.0 ) NQ = NQ - NZ
00286         NQ0 = NUMROC( NUMROC(NN, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ )
00287 *
00288 *       When a column process of IXCOL has a column block A,
00289 *
00290         IF( IXCOL .GE. 0 ) THEN
00291           TBETA = ZERO
00292           IF( MYROW.EQ.JYROW ) TBETA = BETA
00293           KZ = NZ
00294 *
00295           DO 20 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1
00296             MCROW = MOD( MOD(I, NPROW) + IXROW, NPROW )
00297             MCCOL = MOD( MOD(I, NPCOL) + IYCOL, NPCOL )
00298             IF( LCMQ.EQ.1 )  NQ0 = NUMROC( NN, NB, I, 0, NPCOL )
00299             JDEX  = (I/NPCOL) * NB
00300             IF( MRCOL.EQ.0 ) JDEX = MAX(0, JDEX-NZ)
00301 *
00302 *           A source node copies the blocks to WORK, and send it
00303 *
00304             IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IXCOL ) THEN
00305 *
00306 *             The source node is a destination node
00307 *
00308               IDEX = (I/NPROW) * NB
00309               IF( MRROW.EQ.0 ) IDEX = MAX( 0, IDEX-NZ )
00310               IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN
00311                 CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ,
00312      $                          X(IDEX*INCX+1), INCX, TBETA,
00313      $                          Y(JDEX*INCY+1), INCY, LCMP, LCMQ )
00314 *
00315 *             The source node sends blocks to a destination node
00316 *
00317               ELSE
00318                 CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, KZ,
00319      $                         X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
00320      $                         LCMP, 1 )
00321                 CALL DGESD2D( ICONTXT, 1, NQ0-KZ, WORK, 1,
00322      $                        JYROW, MCCOL )
00323               END IF
00324 *
00325 *           A destination node receives the copied vector
00326 *
00327             ELSE IF( MYROW.EQ.JYROW .AND. MYCOL.EQ.MCCOL ) THEN
00328               IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN
00329                 CALL DGERV2D( ICONTXT, 1, NQ0-KZ, Y, INCY,
00330      $                        MCROW, IXCOL )
00331               ELSE
00332                 CALL DGERV2D( ICONTXT, 1, NQ0-KZ, WORK, 1,
00333      $                        MCROW, IXCOL )
00334                 CALL PBDTR2A1( ICONTXT, NQ-JDEX, NB, KZ, WORK, 1, TBETA,
00335      $                         Y(JDEX*INCY+1), INCY, LCMQ*NB )
00336               END IF
00337             END IF
00338             KZ = 0
00339    20     CONTINUE
00340 *
00341 *         Broadcast a row block of WORK in each column of template
00342 *
00343           IF( IYROW.EQ.-1 ) THEN
00344             IF( MYROW.EQ.JYROW ) THEN
00345               CALL DGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY )
00346             ELSE
00347               CALL DGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY,
00348      $                     JYROW, MYCOL )
00349              END IF
00350           END IF
00351 *
00352 *       When all column procesors have a copy of the column block A,
00353 *
00354         ELSE
00355           IF( LCMQ.EQ.1 ) NQ0 = NQ
00356 *
00357 *         Processors, which have diagonal blocks of X, copy them to
00358 *         WORK array in transposed form
00359 *
00360           KZ = 0
00361           IF( MRROW.EQ.0 ) KZ = NZ
00362           JZ = 0
00363           IF( MRROW.EQ.0 .AND. MYCOL.EQ.IYCOL ) JZ = NZ
00364 *
00365           DO 30 I = 0, LCMP - 1
00366             IF( MRCOL.EQ.MOD(NPROW*I+MRROW, NPCOL) ) THEN
00367               IDEX = MAX( 0, I*NB-KZ )
00368               IF( LCMQ.EQ.1 .AND. (IYROW.EQ.-1.OR.IYROW.EQ.MYROW) ) THEN
00369                  CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ,
00370      $                          X(IDEX*INCX+1), INCX, BETA, Y, INCY,
00371      $                          LCMP, 1 )
00372               ELSE
00373                  CALL PBDTR2B1( ICONTXT, TRANS, NP-IDEX, NB, JZ,
00374      $                          X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
00375      $                          LCMP, 1 )
00376               END IF
00377             END IF
00378    30     CONTINUE
00379 *
00380 *         Get diagonal blocks of A for each column of the template
00381 *
00382           MCROW = MOD( MOD(MRCOL, NPROW) + IXROW, NPROW )
00383           IF( LCMQ.GT.1 ) THEN
00384             MCCOL = MOD( NPCOL+MYCOL-IYCOL, NPCOL )
00385             CALL PBDTRGET( ICONTXT, 'Row', 1, NQ0, ICEIL( NN, NB ),
00386      $                     WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL,
00387      $                     NPROW, NPCOL )
00388           END IF
00389 *
00390 *         Broadcast a row block of WORK in every row of template
00391 *
00392           IF( IYROW.EQ.-1 ) THEN
00393             IF( MYROW.EQ.MCROW ) THEN
00394               IF( LCMQ.GT.1 ) THEN
00395                 KZ = 0
00396                 IF( MYCOL.EQ.IYCOL ) KZ = NZ
00397                 CALL PBDTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1,
00398      $                         BETA, Y, INCY, LCMP, LCMQ, NQ0 )
00399               END IF
00400               CALL DGEBS2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY )
00401             ELSE
00402               CALL DGEBR2D( ICONTXT, 'Col', '1-tree', 1, NQ, Y, INCY,
00403      $                      MCROW, MYCOL )
00404             END IF
00405 *
00406 *         Send a row block of WORK to the destination row
00407 *
00408           ELSE
00409             IF( LCMQ.EQ.1 ) THEN
00410               IF( MYROW.EQ.MCROW ) THEN
00411                 IF( MYROW.NE.IYROW )
00412      $            CALL DGESD2D( ICONTXT, 1, NQ0, WORK, 1, IYROW, MYCOL )
00413               ELSE IF( MYROW.EQ.IYROW ) THEN
00414                 IF( BETA.EQ.ZERO ) THEN
00415                   CALL DGERV2D( ICONTXT, 1, NQ0, Y, INCY, MCROW, MYCOL )
00416                 ELSE
00417                   CALL DGERV2D( ICONTXT, 1, NQ0, WORK, 1, MCROW, MYCOL )
00418                   CALL PBDVECADD( ICONTXT, 'G', NQ0, ONE, WORK, 1,
00419      $                            BETA, Y, INCY )
00420                 END IF
00421               END IF
00422 *
00423             ELSE
00424               NQ1 = NQ0 * MIN( LCMQ, MAX( 0, ICEIL(NN,NB)-MCCOL ) )
00425               IF( MYROW.EQ.MCROW ) THEN
00426                 IF( MYROW.NE.IYROW )
00427      $            CALL DGESD2D( ICONTXT, 1, NQ1, WORK, 1, IYROW, MYCOL )
00428               ELSE IF( MYROW.EQ.IYROW ) THEN
00429                 CALL DGERV2D( ICONTXT, 1, NQ1, WORK, 1, MCROW, MYCOL )
00430               END IF
00431 *
00432               IF( MYROW.EQ.IYROW ) THEN
00433                 KZ = 0
00434                 IF( MYCOL.EQ.IYCOL ) KZ = NZ
00435                 CALL PBDTRST1( ICONTXT, 'Row', NQ, NB, KZ, WORK, 1,
00436      $                         BETA, Y, INCY, LCMP, LCMQ, NQ0 )
00437               END IF
00438             END IF
00439           END IF
00440         END IF
00441 *
00442 *     When x is a row vector
00443 *
00444       ELSE
00445 *
00446 *       Form  y <== x'  ( x is a row block )
00447 *
00448 *           ||
00449 *           ||
00450 *           ||               _____________
00451 *          (y)      <==      -----(x)-----
00452 *           ||
00453 *           ||
00454 *           ||
00455 *
00456         IF(      IXROW.LT.-1 .OR. IXROW.GE.NPROW ) THEN
00457           INFO = 12
00458         ELSE IF( IXCOL.LT.0  .OR. IXCOL.GE.NPCOL ) THEN
00459           INFO = 13
00460         ELSE IF( IYROW.LT.0  .OR. IYROW.GE.NPROW ) THEN
00461           INFO = 14
00462         ELSE IF( IYCOL.LT.-1 .OR. IYCOL.GE.NPCOL ) THEN
00463           INFO = 15
00464         END IF
00465         IF( INFO.NE.0 ) GO TO 10
00466 *
00467 *       MRROW : row relative position in template from IYROW
00468 *       MRCOL : column relative position in template from IXCOL
00469 *
00470         MRROW = MOD( NPROW+MYROW-IYROW, NPROW )
00471         MRCOL = MOD( NPCOL+MYCOL-IXCOL, NPCOL )
00472         JYCOL = IYCOL
00473         IF( IYCOL.EQ.-1 ) JYCOL = IXCOL
00474 *
00475         NP  = NUMROC( NN, NB, MYROW, IYROW, NPROW )
00476         IF( MRROW.EQ.0 ) NP = NP - NZ
00477         NQ  = NUMROC( NN, NB, MYCOL, IXCOL, NPCOL )
00478         IF( MRCOL.EQ.0 ) NQ = NQ - NZ
00479         NP0 = NUMROC( NUMROC(NN, NB, 0, 0, NPROW), NB, 0, 0, LCMP )
00480 *
00481 *       When a row process of IXROW has a row block A,
00482 *
00483         IF( IXROW .GE. 0 ) THEN
00484           TBETA = ZERO
00485           IF( MYCOL.EQ.JYCOL ) TBETA = BETA
00486           KZ = NZ
00487 *
00488           DO 40 I = 0, MIN( LCM, ICEIL(NN,NB) ) - 1
00489             MCROW = MOD( MOD(I, NPROW) + IYROW, NPROW )
00490             MCCOL = MOD( MOD(I, NPCOL) + IXCOL, NPCOL )
00491             IF( LCMP.EQ.1 ) NP0 = NUMROC( NN, NB, I, 0, NPROW )
00492             JDEX  = (I/NPROW) * NB
00493             IF( MRROW.EQ.0 ) JDEX = MAX(0, JDEX-NZ)
00494 *
00495 *           A source node copies the blocks to WORK, and send it
00496 *
00497             IF( MYROW.EQ.IXROW .AND. MYCOL.EQ.MCCOL ) THEN
00498 *
00499 *             The source node is a destination node
00500 *
00501               IDEX = (I/NPCOL) * NB
00502               IF( MRCOL.EQ.0 ) IDEX = MAX( 0, IDEX-NZ )
00503               IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN
00504                 CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ,
00505      $                         X(IDEX*INCX+1), INCX, TBETA,
00506      $                         Y(JDEX*INCY+1), INCY, LCMQ, LCMP )
00507 *
00508 *             The source node sends blocks to a destination node
00509 *
00510               ELSE
00511                 CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, KZ,
00512      $                         X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
00513      $                         LCMQ, 1 )
00514                 CALL DGESD2D( ICONTXT, 1, NP0-KZ, WORK, 1,
00515      $                        MCROW, JYCOL )
00516               END IF
00517 *
00518 *           A destination node receives the copied blocks
00519 *
00520             ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JYCOL ) THEN
00521               IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN
00522                 CALL DGERV2D( ICONTXT, 1, NP0-KZ, Y, INCY,
00523      $                        IXROW, MCCOL )
00524               ELSE
00525                 CALL DGERV2D( ICONTXT, 1, NP0-KZ, WORK, 1,
00526      $                        IXROW, MCCOL )
00527                 CALL PBDTR2A1( ICONTXT, NP-JDEX, NB, KZ, WORK, 1, TBETA,
00528      $                         Y(JDEX*INCY+1), INCY, LCMP*NB )
00529               END IF
00530             END IF
00531             KZ = 0
00532    40     CONTINUE
00533 *
00534 *         Broadcast a column vector Y in each row of template
00535 *
00536           IF( IYCOL.EQ.-1 ) THEN
00537             IF( MYCOL.EQ.JYCOL ) THEN
00538               CALL DGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY )
00539             ELSE
00540               CALL DGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY,
00541      $                      MYROW, JYCOL )
00542             END IF
00543           END IF
00544 *
00545 *       When all row procesors have a copy of the row block A,
00546 *
00547         ELSE
00548           IF( LCMP.EQ.1 ) NP0 = NP
00549 *
00550 *         Processors, which have diagonal blocks of A, copy them to
00551 *         WORK array in transposed form
00552 *
00553           KZ = 0
00554           IF( MRCOL.EQ.0 ) KZ = NZ
00555           JZ = 0
00556           IF( MRCOL.EQ.0 .AND. MYROW.EQ.IYROW ) JZ = NZ
00557 *
00558           DO 50 I = 0, LCMQ-1
00559             IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN
00560               IDEX = MAX( 0, I*NB-KZ )
00561               IF( LCMP.EQ.1 .AND. (IYCOL.EQ.-1.OR.IYCOL.EQ.MYCOL) ) THEN
00562                 CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ,
00563      $                          X(IDEX*INCX+1), INCX, BETA, Y, INCY,
00564      $                          LCMQ, 1 )
00565               ELSE
00566                 CALL PBDTR2B1( ICONTXT, TRANS, NQ-IDEX, NB, JZ,
00567      $                         X(IDEX*INCX+1), INCX, ZERO, WORK, 1,
00568      $                         LCMQ, 1 )
00569               END IF
00570             END IF
00571    50     CONTINUE
00572 *
00573 *         Get diagonal blocks of A for each row of the template
00574 *
00575           MCCOL = MOD( MOD(MRROW, NPCOL) + IXCOL, NPCOL )
00576           IF( LCMP.GT.1 ) THEN
00577             MCROW = MOD( NPROW+MYROW-IYROW, NPROW )
00578             CALL PBDTRGET( ICONTXT, 'Col', 1, NP0, ICEIL( NN, NB ),
00579      $                     WORK, 1, MCROW, MCCOL, IGD, MYROW, MYCOL,
00580      $                     NPROW, NPCOL )
00581           END IF
00582 *
00583 *         Broadcast a column block of WORK in every column of template
00584 *
00585           IF( IYCOL.EQ.-1 ) THEN
00586             IF( MYCOL.EQ.MCCOL ) THEN
00587               IF( LCMP.GT.1 ) THEN
00588                 KZ = 0
00589                 IF( MYROW.EQ.IYROW ) KZ = NZ
00590                 CALL PBDTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1,
00591      $                         BETA, Y, INCY, LCMP, LCMQ, NP0 )
00592               END IF
00593               CALL DGEBS2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY )
00594             ELSE
00595               CALL DGEBR2D( ICONTXT, 'Row', '1-tree', 1, NP, Y, INCY,
00596      $                      MYROW, MCCOL )
00597             END IF
00598 *
00599 *         Send a column block of WORK to the destination column
00600 *
00601           ELSE
00602             IF( LCMP.EQ.1 ) THEN
00603               IF( MYCOL.EQ.MCCOL ) THEN
00604                 IF( MYCOL.NE.IYCOL )
00605      $            CALL DGESD2D( ICONTXT, 1, NP, WORK, 1, MYROW, IYCOL )
00606               ELSE IF( MYCOL.EQ.IYCOL ) THEN
00607                 IF( BETA.EQ.ZERO ) THEN
00608                   CALL DGERV2D( ICONTXT, 1, NP, Y, INCY, MYROW, MCCOL )
00609                 ELSE
00610                   CALL DGERV2D( ICONTXT, 1, NP, WORK, 1, MYROW, MCCOL )
00611                   CALL PBDVECADD( ICONTXT, 'G', NP, ONE, WORK, 1, BETA,
00612      $                            Y, INCY )
00613                 END IF
00614               END IF
00615 *
00616             ELSE
00617               NP1 = NP0 * MIN( LCMP, MAX( 0, ICEIL(NN,NB)-MCROW ) )
00618               IF( MYCOL.EQ.MCCOL ) THEN
00619                 IF( MYCOL.NE.IYCOL )
00620      $            CALL DGESD2D( ICONTXT, 1, NP1, WORK, 1, MYROW, IYCOL )
00621               ELSE IF( MYCOL.EQ.IYCOL ) THEN
00622                 CALL DGERV2D( ICONTXT, 1, NP1, WORK, 1, MYROW, MCCOL )
00623               END IF
00624 *
00625               IF( MYCOL.EQ.IYCOL ) THEN
00626                 KZ = 0
00627                 IF( MYROW.EQ.IYROW ) KZ = NZ
00628                 CALL PBDTRST1( ICONTXT, 'Col', NP, NB, KZ, WORK, 1,
00629      $                         BETA, Y, INCY, LCMP, LCMQ, NP0 )
00630               END IF
00631             END IF
00632           END IF
00633         END IF
00634       END IF
00635 *
00636       RETURN
00637 *
00638 *     End of PBDTRNV
00639 *
00640       END
00641 *
00642 *=======================================================================
00643 *     SUBROUTINE PBDTR2A1
00644 *=======================================================================
00645 *
00646       SUBROUTINE PBDTR2A1( ICONTXT, N, NB, NZ, X, INCX, BETA, Y, INCY,
00647      $                     INTV )
00648 *
00649 *  -- PB-BLAS routine (version 2.1) --
00650 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
00651 *     April 28, 1996
00652 *
00653 *     .. Scalar Arguments ..
00654       INTEGER              ICONTXT, N, NB, NZ, INCX, INCY, INTV
00655       DOUBLE PRECISION     BETA
00656 *     ..
00657 *     .. Array Arguments ..
00658       DOUBLE PRECISION     X( * ), Y( * )
00659 *     ..
00660 *
00661 *  Purpose
00662 *  =======
00663 *
00664 *     y <== x
00665 *     y is a scattered vector, copied from a condensed vector x.
00666 *
00667 *     ..
00668 *     .. Intrinsic Functions ..
00669       INTRINSIC            MIN
00670 *     ..
00671 *     .. External Functions ..
00672       INTEGER              ICEIL
00673       EXTERNAL             ICEIL
00674 *     ..
00675 *     .. External Subroutines ..
00676       EXTERNAL             PBDVECADD
00677 *     ..
00678 *     .. Parameters ..
00679       DOUBLE PRECISION     ONE
00680       PARAMETER          ( ONE = 1.0D+0 )
00681 *     ..
00682 *     .. Local Variables ..
00683       INTEGER              IX, IY, JZ, K, ITER
00684 *
00685       IX = 0
00686       IY = 0
00687       JZ = NZ
00688       ITER = ICEIL( N+NZ, INTV )
00689 *
00690       IF( ITER.GT.1 ) THEN
00691          CALL PBDVECADD( ICONTXT, 'G', NB-JZ, ONE, X(IX*INCX+1), INCX,
00692      $                   BETA, Y(IY*INCY+1), INCY )
00693          IX = IX + NB   - JZ
00694          IY = IY + INTV - JZ
00695          JZ = 0
00696 *
00697          DO 10 K = 2, ITER-1
00698             CALL PBDVECADD( ICONTXT, 'G', NB, ONE, X(IX*INCX+1), INCX,
00699      $                      BETA, Y(IY*INCY+1), INCY )
00700             IX = IX + NB
00701             IY = IY + INTV
00702    10    CONTINUE
00703       END IF
00704 *
00705       CALL PBDVECADD( ICONTXT, 'G', MIN( N-IY, NB-JZ ), ONE,
00706      $                X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY )
00707 *
00708       RETURN
00709 *
00710 *     End of PBDTR2A1
00711 *
00712       END
00713 *
00714 *=======================================================================
00715 *     SUBROUTINE PBDTR2B1
00716 *=======================================================================
00717 *
00718       SUBROUTINE PBDTR2B1( ICONTXT, TRANS, N, NB, NZ, X, INCX, BETA, Y,
00719      $                     INCY, JINX, JINY )
00720 *
00721 *  -- PB-BLAS routine (version 2.1) --
00722 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
00723 *     April 28, 1996
00724 *
00725 *     .. Scalar Arguments ..
00726       CHARACTER*1          TRANS
00727       INTEGER              ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
00728       DOUBLE PRECISION     BETA
00729 *     ..
00730 *     .. Array Arguments ..
00731       DOUBLE PRECISION     X( * ), Y( * )
00732 *     ..
00733 *
00734 *  Purpose
00735 *  =======
00736 *
00737 *     y <== x + beta * y
00738 *     y is a condensed vector, copied from a scattered vector x
00739 *
00740 *     ..
00741 *     .. Intrinsic Functions ..
00742       INTRINSIC            MIN
00743 *     ..
00744 *     .. External Functions ..
00745       INTEGER              ICEIL
00746       EXTERNAL             ICEIL
00747 *     ..
00748 *     .. External Subroutines ..
00749       EXTERNAL             PBDVECADD
00750 *     ..
00751 *     .. Parameters ..
00752       DOUBLE PRECISION     ONE
00753       PARAMETER          ( ONE = 1.0D+0 )
00754 *     ..
00755 *     .. Local Variables ..
00756       INTEGER              IX, IY, JZ, K, ITER, LENX, LENY
00757 *
00758       IF( JINX.EQ.1 .AND. JINY.EQ.1 ) THEN
00759          CALL PBDVECADD( ICONTXT, TRANS, N, ONE, X, INCX, BETA,
00760      $                   Y, INCY )
00761 *
00762       ELSE
00763          IX   = 0
00764          IY   = 0
00765          JZ   = NZ
00766          LENX = NB * JINX
00767          LENY = NB * JINY
00768          ITER = ICEIL( N+NZ, LENX )
00769 *
00770          IF( ITER.GT.1 ) THEN
00771             CALL PBDVECADD( ICONTXT, TRANS, NB-JZ, ONE, X(IX*INCX+1),
00772      $                      INCX, BETA, Y(IY*INCY+1), INCY )
00773             IX = IX + LENX - JZ
00774             IY = IY + LENY - JZ
00775             JZ = 0
00776 *
00777             DO 10 K = 2, ITER-1
00778                CALL PBDVECADD( ICONTXT, TRANS, NB, ONE, X(IX*INCX+1),
00779      $                         INCX, BETA, Y(IY*INCY+1), INCY )
00780                IX = IX + LENX
00781                IY = IY + LENY
00782    10       CONTINUE
00783          END IF
00784 *
00785          CALL PBDVECADD( ICONTXT, TRANS, MIN( N-IX, NB-JZ ), ONE,
00786      $                   X(IX*INCX+1), INCX, BETA, Y(IY*INCY+1), INCY )
00787       END IF
00788 *
00789       RETURN
00790 *
00791 *     End of PBDTR2B1
00792 *
00793       END