ScaLAPACK  2.0.2
ScaLAPACK: Scalable Linear Algebra PACKage
pbstran.f
Go to the documentation of this file.
00001       SUBROUTINE PBSTRAN( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA,
00002      $                    C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK )
00003 *
00004 *  -- PB-BLAS routine (version 2.1) --
00005 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
00006 *     April 28, 1996
00007 *
00008 *     Jaeyoung Choi, Oak Ridge National Laboratory
00009 *     Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
00010 *     David Walker,  Oak Ridge National Laboratory
00011 *
00012 *     .. Scalar Arguments ..
00013       CHARACTER*1        ADIST, TRANS
00014       INTEGER            IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC,
00015      $                   M, N, NB
00016       REAL               BETA
00017 *     ..
00018 *     .. Array Arguments ..
00019       REAL               A( LDA, * ), C( LDC, * ), WORK( * )
00020 *     ..
00021 *
00022 *  Purpose
00023 *  =======
00024 *
00025 *  PBSTRAN  transposes  a column block to row block, or a row block to
00026 *  column block by reallocating data distribution.
00027 *
00028 *     C := A^T + beta*C, or C := A^C + beta*C
00029 *
00030 *  where A is an M-by-N matrix  and C is an N-by-M matrix, and the size
00031 *  of M or N is limited to its block size NB.
00032 *
00033 *  The first elements  of the matrices A, and C  should  be  located  at
00034 *  the beginnings of their first blocks. (not the middle of the blocks.)
00035 *
00036 *  Parameters
00037 *  ==========
00038 *
00039 *  ICONTXT (input) INTEGER
00040 *          ICONTXT is the BLACS mechanism for partitioning communication
00041 *          space.  A defining property of a context is that a message in
00042 *          a context cannot be sent or received in another context.  The
00043 *          BLACS context includes the definition of a grid, and each
00044 *          process' coordinates in it.
00045 *
00046 *  ADIST  - (input) CHARACTER*1
00047 *           ADIST specifies whether A is a column block or a row block.
00048 *
00049 *              ADIST = 'C',  A is a column block
00050 *              ADIST = 'R',  A is a row block
00051 *
00052 *  TRANS  - (input) CHARACTER*1
00053 *           TRANS specifies whether the transposed format is transpose
00054 *           or conjugate transpose.  If the matrices A and C are real,
00055 *           the argument is ignored.
00056 *
00057 *              TRANS = 'T',  transpose
00058 *              TRANS = 'C',  conjugate transpose
00059 *
00060 *  M      - (input) INTEGER
00061 *           M specifies the (global) number of rows of the matrix (block
00062 *           column or block row) A and of columns of the matrix C.
00063 *           M >= 0.
00064 *
00065 *  N      - (input) INTEGER
00066 *           N specifies the (global) number of columns of the matrix
00067 *           (block column or block row) A  and of columns of the matrix
00068 *           C.  N >= 0.
00069 *
00070 *  NB     - (input) INTEGER
00071 *           NB specifies  the column block size of the matrix A and the
00072 *           row block size of the matrix C when ADIST = 'C'.  Otherwise,
00073 *           it specifies  the row block size of the matrix A and the
00074 *           column block size of the matrix C. NB >= 1.
00075 *
00076 *  A       (input) REAL array of DIMENSION ( LDA, Lx ),
00077 *          where Lx is N  when ADIST = 'C', or Nq when ADIST = 'R'.
00078 *          Before entry with  ADIST = 'C',  the leading Mp by N part of
00079 *          the array A must contain the matrix A, otherwise the leading
00080 *          M by Nq part of the array A  must contain the matrix A.  See
00081 *          parameter details for the values of Mp and Nq.
00082 *
00083 *  LDA     (input) INTEGER
00084 *          LDA specifies the leading dimension of (local) A as declared
00085 *          in the calling (sub) program.  LDA >= MAX(1,Mp) when
00086 *          ADIST = 'C', or LDA >= MAX(1,M) otherwise.
00087 *
00088 *  BETA    (input) REAL
00089 *          BETA specifies scaler beta.
00090 *
00091 *  C       (input/output) REAL array of DIMENSION ( LDC, Lx ),
00092 *          where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'.
00093 *          If ADIST = 'C', the leading N-by-Mq part of the array C
00094 *          contains the (local) matrix C, otherwise the leading
00095 *          Np-by-M part of the array C must contain the (local) matrix
00096 *          C.  C will not be referenced if beta is zero.
00097 *
00098 *  LDC     (input) INTEGER
00099 *          LDC specifies the leading dimension of (local) C as declared
00100 *          in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C',
00101 *          or LDC >= MAX(1,Np) otherwise.
00102 *
00103 *  IAROW   (input) INTEGER
00104 *          IAROW specifies  a row  of the process  template,
00105 *          which holds the first block  of the matrix  A. If A is a row
00106 *          of blocks (ADIST = 'R') and all rows of processes have a copy
00107 *          of A, then set IAROW = -1.
00108 *
00109 *  IACOL   (input) INTEGER
00110 *          IACOL specifies  a column of the process template,
00111 *          which holds  the first block  of the matrix A.  If  A is  a
00112 *          column of blocks (ADIST = 'C') and all columns of processes
00113 *          have a copy of A, then set IACOL = -1.
00114 *
00115 *  ICROW   (input) INTEGER
00116 *          ICROW specifies the current row process which holds
00117 *          the first block  of the matrix C,  which is transposed of A.
00118 *          If C is a row of blocks (ADIST = 'C') and the transposed
00119 *          row block C is distributed all rows of processes, set
00120 *          ICROW = -1.
00121 *
00122 *  ICCOL   (input) INTEGER
00123 *          ICCOL specifies  the current column process which holds
00124 *          the first block of the matrix C,  which is transposed of A.
00125 *          If C is a column of blocks (ADIST = 'R') and the transposed
00126 *          column block C is distributed all columns of processes,
00127 *          set ICCOL = -1.
00128 *
00129 *  WORK    (workspace) REAL array of dimension Size(WORK).
00130 *          It needs extra working space of A'.
00131 *
00132 *  Parameters Details
00133 *  ==================
00134 *
00135 *  Lx      It is  a local portion  of L  owned  by  a process,  (L is
00136 *          replaced by M, or N,  and x is replaced by either p (=NPROW)
00137 *          or q (=NPCOL)).  The value is  determined by  L, LB, x,  and
00138 *          MI, where  LB is  a block size  and  MI is a  row  or column
00139 *          position  in a process template.  Lx is  equal to  or less
00140 *          than Lx0 = CEIL( L, LB*x ) * LB.
00141 *
00142 *  Communication Scheme
00143 *  ====================
00144 *
00145 *  The communication scheme of the routine is set to '1-tree', which is
00146 *  fan-out.  (For details, see BLACS user's guide.)
00147 *
00148 *  Memory Requirement of WORK
00149 *  ==========================
00150 *
00151 *  Mqb  = CEIL( M, NB*NPCOL )
00152 *  Npb  = CEIL( N, NB*NPROW )
00153 *  LCMQ = LCM / NPCOL
00154 *  LCMP = LCM / NPROW
00155 *
00156 *  (1) ADIST = 'C'
00157 *   (a) IACOL != -1
00158 *       Size(WORK) = N * CEIL(Mqb,LCMQ)*NB
00159 *   (b) IACOL = -1
00160 *       Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB))
00161 *
00162 *  (2) ADIST = 'R'
00163 *   (a) IAROW != -1
00164 *       Size(WORK) = M * CEIL(Npb,LCMP)*NB
00165 *   (b) IAROW = -1
00166 *       Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB))
00167 *
00168 *  Notes
00169 *  -----
00170 *  More precise space can be computed as
00171 *
00172 *  CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ )
00173 *  CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP )
00174 *
00175 *  =====================================================================
00176 *
00177 *     ..
00178 *     .. Parameters ..
00179       REAL               ONE, ZERO
00180       PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
00181 *     ..
00182 *     .. Local Scalars ..
00183       LOGICAL            COLFORM, ROWFORM
00184       INTEGER            I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM,
00185      $                   LCMP, LCMQ, MCCOL, MCROW, ML, MP, MQ, MQ0,
00186      $                   MRCOL, MRROW, MYCOL, MYROW, NP, NP0, NPCOL,
00187      $                   NPROW, NQ
00188       REAL               TBETA
00189 *     ..
00190 *     .. External Functions ..
00191       LOGICAL            LSAME
00192       INTEGER            ILCM, ICEIL, NUMROC
00193       EXTERNAL           ILCM, ICEIL, LSAME, NUMROC
00194 *     ..
00195 *     .. External Subroutines ..
00196       EXTERNAL           BLACS_GRIDINFO, PBSMATADD, PBSTR2AF, PBSTR2AT,
00197      $                   PBSTR2BT, PBSTRGET, PBSTRSRT, PXERBLA, SGEBR2D,
00198      $                   SGEBS2D, SGERV2D, SGESD2D
00199 *     ..
00200 *     .. Intrinsic Functions ..
00201       INTRINSIC          MAX, MIN, MOD
00202 *     ..
00203 *     .. Executable Statements ..
00204 *
00205 *     Quick return if possible.
00206 *
00207       IF( M.EQ.0 .OR. N.EQ.0 ) RETURN
00208 *
00209       CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL )
00210 *
00211       COLFORM = LSAME( ADIST, 'C' )
00212       ROWFORM = LSAME( ADIST, 'R' )
00213 *
00214 *     Test the input parameters.
00215 *
00216       INFO = 0
00217       IF( ( .NOT.COLFORM ) .AND. ( .NOT.ROWFORM ) ) THEN
00218          INFO = 2
00219       ELSE IF( M .LT.0                            ) THEN
00220          INFO = 4
00221       ELSE IF( N .LT.0                            ) THEN
00222          INFO = 5
00223       ELSE IF( NB.LT.1                            ) THEN
00224          INFO = 6
00225       ELSE IF( IAROW.LT.-1 .OR. IAROW.GE.NPROW .OR.
00226      $       ( IAROW.EQ.-1 .AND. COLFORM )        ) THEN
00227          INFO = 12
00228       ELSE IF( IACOL.LT.-1 .OR. IACOL.GE.NPCOL .OR.
00229      $       ( IACOL.EQ.-1 .AND. ROWFORM )        ) THEN
00230          INFO = 13
00231       ELSE IF( ICROW.LT.-1 .OR. ICROW.GE.NPROW .OR.
00232      $       ( ICROW.EQ.-1 .AND. ROWFORM )        ) THEN
00233          INFO = 14
00234       ELSE IF( ICCOL.LT.-1 .OR. ICCOL.GE.NPCOL .OR.
00235      $       ( ICCOL.EQ.-1 .AND. COLFORM )        ) THEN
00236          INFO = 15
00237       END IF
00238 *
00239    10 CONTINUE
00240       IF( INFO .NE. 0 ) THEN
00241          CALL PXERBLA( ICONTXT, 'PBSTRAN ', INFO )
00242          RETURN
00243       END IF
00244 *
00245 *     Start the operations.
00246 *
00247 *     LCM : the least common multiple of NPROW and NPCOL
00248 *
00249       LCM  = ILCM( NPROW, NPCOL )
00250       LCMP = LCM   / NPROW
00251       LCMQ = LCM   / NPCOL
00252       IGD  = NPCOL / LCMP
00253 *
00254 *     When A is a column block
00255 *
00256       IF( COLFORM ) THEN
00257 *
00258 *       Form  C <== A'  ( A is a column block )
00259 *                                         _
00260 *                                        | |
00261 *                                        | |
00262 *            _____________               | |
00263 *           |______C______|     <==      |A|
00264 *                                        | |
00265 *                                        | |
00266 *                                        |_|
00267 *
00268 *       MRROW : row relative position in template from IAROW
00269 *       MRCOL : column relative position in template from ICCOL
00270 *
00271         MRROW = MOD( NPROW+MYROW-IAROW, NPROW )
00272         MRCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL )
00273         JCROW = ICROW
00274         IF( ICROW.EQ.-1 ) JCROW = IAROW
00275 *
00276         MP  = NUMROC( M, NB, MYROW, IAROW, NPROW )
00277         MQ  = NUMROC( M, NB, MYCOL, ICCOL, NPCOL )
00278         MQ0 = NUMROC( NUMROC(M, NB, 0, 0, NPCOL), NB, 0, 0, LCMQ )
00279 *
00280         IF( LDA.LT.MP .AND.
00281      $         ( IACOL.EQ.MYCOL .OR. IACOL.EQ.-1 ) ) THEN
00282            INFO = 8
00283         ELSE IF( LDC.LT.N .AND.
00284      $         ( ICROW.EQ.MYROW .OR. ICROW.EQ.-1 ) ) THEN
00285            INFO = 11
00286         END IF
00287         IF( INFO.NE.0 ) GO TO 10
00288 *
00289 *       When a column process of IACOL has a column block A,
00290 *
00291         IF( IACOL.GE.0 ) THEN
00292           TBETA = ZERO
00293           IF( MYROW.EQ.JCROW ) TBETA = BETA
00294 *
00295           DO 20 I = 0, MIN( LCM, ICEIL(M,NB) ) - 1
00296             MCROW = MOD( MOD(I, NPROW) + IAROW, NPROW )
00297             MCCOL = MOD( MOD(I, NPCOL) + ICCOL, NPCOL )
00298             IF( LCMQ.EQ.1 )  MQ0 = NUMROC( M, NB, I, 0, NPCOL )
00299             JDEX = (I/NPCOL) * NB
00300 *
00301 *           A source node copies the blocks to WORK, and send it
00302 *
00303             IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.IACOL ) THEN
00304 *
00305 *             The source node is a destination node
00306 *
00307               IDEX = (I/NPROW) * NB
00308               IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN
00309                 CALL PBSTR2AT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB,
00310      $                         A(IDEX+1,1), LDA, TBETA, C(1,JDEX+1),
00311      $                         LDC, LCMP, LCMQ )
00312 *
00313 *             The source node sends blocks to a destination node
00314 *
00315               ELSE
00316                 CALL PBSTR2BT( ICONTXT, 'Col', TRANS, MP-IDEX, N, NB,
00317      $                         A(IDEX+1,1), LDA, ZERO, WORK, N,
00318      $                         LCMP*NB )
00319                 CALL SGESD2D( ICONTXT, N, MQ0, WORK, N, JCROW, MCCOL )
00320               END IF
00321 *
00322 *           A destination node receives the copied blocks
00323 *
00324             ELSE IF( MYROW.EQ.JCROW .AND. MYCOL.EQ.MCCOL ) THEN
00325               IF( LCMQ.EQ.1 .AND. TBETA.EQ.ZERO ) THEN
00326                 CALL SGERV2D( ICONTXT, N, MQ0, C, LDC, MCROW, IACOL )
00327               ELSE
00328                 CALL SGERV2D( ICONTXT, N, MQ0, WORK, N, MCROW, IACOL )
00329                 CALL PBSTR2AF( ICONTXT, 'Row', N, MQ-JDEX, NB, WORK, N,
00330      $                         TBETA, C(1,JDEX+1), LDC, LCMP, LCMQ,
00331      $                         MQ0 )
00332               END IF
00333             END IF
00334    20     CONTINUE
00335 *
00336 *         Broadcast a row block of C in each column of template
00337 *
00338           IF( ICROW.EQ.-1 ) THEN
00339             IF( MYROW.EQ.JCROW ) THEN
00340               CALL SGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC )
00341             ELSE
00342               CALL SGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC,
00343      $                      JCROW, MYCOL )
00344             END IF
00345           END IF
00346 *
00347 *       When all column procesors have a copy of the column block A,
00348 *
00349         ELSE
00350           IF( LCMQ.EQ.1 ) MQ0 = MQ
00351 *
00352 *         Processors, which have diagonal blocks of A, copy them to
00353 *         WORK array in transposed form
00354 *
00355           DO 30 I = 0, LCMP-1
00356             IF( MRCOL.EQ.MOD( NPROW*I+MRROW, NPCOL ) ) THEN
00357               IF( LCMQ.EQ.1.AND.(ICROW.EQ.-1.OR.ICROW.EQ.MYROW) ) THEN
00358                  CALL PBSTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB,
00359      $                          A(I*NB+1,1), LDA, BETA, C, LDC,
00360      $                          LCMP*NB )
00361               ELSE
00362                  CALL PBSTR2BT( ICONTXT, 'Col', TRANS, MP-I*NB, N, NB,
00363      $                          A(I*NB+1,1), LDA, ZERO, WORK, N,
00364      $                          LCMP*NB )
00365               END IF
00366             END IF
00367    30     CONTINUE
00368 *
00369 *         Get diagonal blocks of A for each column of the template
00370 *
00371           MCROW = MOD( MOD(MRCOL,NPROW)+IAROW, NPROW )
00372           IF( LCMQ.GT.1 ) THEN
00373             MCCOL = MOD( NPCOL+MYCOL-ICCOL, NPCOL )
00374             CALL PBSTRGET( ICONTXT, 'Row', N, MQ0, ICEIL(M,NB), WORK, N,
00375      $                     MCROW,  MCCOL, IGD, MYROW, MYCOL, NPROW,
00376      $                     NPCOL )
00377           END IF
00378 *
00379 *         Broadcast a row block of WORK in every row of template
00380 *
00381           IF( ICROW.EQ.-1 ) THEN
00382             IF( MYROW.EQ.MCROW ) THEN
00383               IF( LCMQ.GT.1 )
00384      $          CALL PBSTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA,
00385      $                         C, LDC, LCMP, LCMQ, MQ0 )
00386               CALL SGEBS2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC )
00387             ELSE
00388               CALL SGEBR2D( ICONTXT, 'Col', '1-tree', N, MQ, C, LDC,
00389      $                      MCROW, MYCOL )
00390             END IF
00391 *
00392 *         Send a row block of WORK to the destination row
00393 *
00394           ELSE
00395             IF( LCMQ.EQ.1 ) THEN
00396               IF( MYROW.EQ.MCROW ) THEN
00397                 IF( MYROW.NE.ICROW )
00398      $            CALL SGESD2D( ICONTXT, N, MQ, WORK, N, ICROW, MYCOL )
00399               ELSE IF( MYROW.EQ.ICROW ) THEN
00400                 IF( BETA.EQ.ZERO ) THEN
00401                   CALL SGERV2D( ICONTXT, N, MQ, C, LDC, MCROW, MYCOL )
00402                 ELSE
00403                   CALL SGERV2D( ICONTXT, N, MQ, WORK, N, MCROW, MYCOL )
00404                   CALL PBSMATADD( ICONTXT, 'G', N, MQ, ONE, WORK, N,
00405      $                            BETA, C, LDC )
00406                 END IF
00407               END IF
00408 *
00409             ELSE
00410               ML = MQ0 * MIN( LCMQ, MAX(0,ICEIL(M,NB)-MCCOL) )
00411               IF( MYROW.EQ.MCROW ) THEN
00412                 IF( MYROW.NE.ICROW )
00413      $            CALL SGESD2D( ICONTXT, N, ML, WORK, N, ICROW, MYCOL )
00414               ELSE IF( MYROW.EQ.ICROW ) THEN
00415                 CALL SGERV2D( ICONTXT, N, ML, WORK, N, MCROW, MYCOL )
00416               END IF
00417 *
00418               IF( MYROW.EQ.ICROW )
00419      $          CALL PBSTRSRT( ICONTXT, 'Row', N, MQ, NB, WORK, N, BETA,
00420      $                         C, LDC, LCMP, LCMQ, MQ0 )
00421             END IF
00422           END IF
00423 *
00424         END IF
00425 *
00426 *     When A is a row block
00427 *
00428       ELSE
00429 *
00430 *        Form  C <== A'  ( A is a row block )
00431 *            _
00432 *           | |
00433 *           | |
00434 *           | |                _____________
00435 *           |C|      <==      |______A______|
00436 *           | |
00437 *           | |
00438 *           |_|
00439 *
00440 *        MRROW : row relative position in template from ICROW
00441 *        MRCOL : column relative position in template from IACOL
00442 *
00443          MRROW = MOD( NPROW+MYROW-ICROW, NPROW )
00444          MRCOL = MOD( NPCOL+MYCOL-IACOL, NPCOL )
00445          JCCOL = ICCOL
00446          IF( ICCOL.EQ.-1 ) JCCOL = IACOL
00447 *
00448          NP  = NUMROC( N, NB, MYROW, ICROW, NPROW )
00449          NQ  = NUMROC( N, NB, MYCOL, IACOL, NPCOL )
00450          NP0 = NUMROC( NUMROC(N, NB, 0, 0, NPROW), NB, 0, 0, LCMP )
00451 *
00452          IF( LDA.LT.M .AND.
00453      $          ( IAROW.EQ.MYROW .OR. IAROW.EQ.-1 ) ) THEN
00454             INFO = 8
00455          ELSE IF( LDC.LT.NP .AND.
00456      $          ( ICCOL.EQ.MYCOL .OR. ICCOL.EQ.-1 ) ) THEN
00457             INFO = 11
00458          END IF
00459          IF( INFO.NE.0 ) GO TO 10
00460 *
00461 *        When a row process of IAROW has a row block A,
00462 *
00463          IF( IAROW.GE.0 ) THEN
00464            TBETA = ZERO
00465            IF( MYCOL.EQ.JCCOL ) TBETA = BETA
00466 *
00467            DO 40 I = 0, MIN( LCM, ICEIL(N,NB) ) - 1
00468              MCROW = MOD( MOD(I, NPROW) + ICROW, NPROW )
00469              MCCOL = MOD( MOD(I, NPCOL) + IACOL, NPCOL )
00470              IF( LCMP.EQ.1 )  NP0 = NUMROC( N, NB, I, 0, NPROW )
00471              IDEX = (I/NPROW) * NB
00472 *
00473 *            A source node copies the blocks to WORK, and send it
00474 *
00475              IF( MYROW.EQ.IAROW .AND. MYCOL.EQ.MCCOL ) THEN
00476 *
00477 *              The source node is a destination node
00478 *
00479                JDEX = (I/NPCOL) * NB
00480                IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN
00481                  CALL PBSTR2AT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB,
00482      $                          A(1,JDEX+1), LDA, TBETA, C(IDEX+1,1),
00483      $                          LDC, LCMP, LCMQ )
00484 *
00485 *              The source node sends blocks to a destination node
00486 *
00487                ELSE
00488                  CALL PBSTR2BT( ICONTXT, 'Row', TRANS, M, NQ-JDEX, NB,
00489      $                          A(1,JDEX+1), LDA, ZERO, WORK, NP0,
00490      $                          LCMQ*NB )
00491                  CALL SGESD2D( ICONTXT, NP0, M, WORK, NP0,
00492      $                         MCROW, JCCOL )
00493                END IF
00494 *
00495 *           A destination node receives the copied blocks
00496 *
00497             ELSE IF( MYROW.EQ.MCROW .AND. MYCOL.EQ.JCCOL ) THEN
00498               IF( LCMP.EQ.1 .AND. TBETA.EQ.ZERO ) THEN
00499                 CALL SGERV2D( ICONTXT, NP0, M, C, LDC, IAROW, MCCOL )
00500               ELSE
00501                 CALL SGERV2D( ICONTXT, NP0, M, WORK, NP0, IAROW, MCCOL )
00502                 CALL PBSTR2AF( ICONTXT, 'Col', NP-IDEX, M, NB, WORK,
00503      $                         NP0, TBETA, C(IDEX+1,1), LDC, LCMP, LCMQ,
00504      $                         NP0 )
00505               END IF
00506             END IF
00507    40     CONTINUE
00508 *
00509 *         Broadcast a column block of WORK in each row of template
00510 *
00511           IF( ICCOL.EQ.-1 ) THEN
00512             IF( MYCOL.EQ.JCCOL ) THEN
00513               CALL SGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC )
00514             ELSE
00515               CALL SGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC,
00516      $                       MYROW, JCCOL )
00517             END IF
00518           END IF
00519 *
00520 *       When all row procesors have a copy of the row block A,
00521 *
00522         ELSE
00523           IF( LCMP.EQ.1 ) NP0 = NP
00524 *
00525 *         Processors, which have diagonal blocks of A, copy them to
00526 *         WORK array in transposed form
00527 *
00528           DO 50 I = 0, LCMQ-1
00529             IF( MRROW.EQ.MOD(NPCOL*I+MRCOL, NPROW) ) THEN
00530               IF( LCMP.EQ.1.AND.(ICCOL.EQ.-1.OR.ICCOL.EQ.MYCOL) ) THEN
00531                 CALL PBSTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB,
00532      $                         A(1,I*NB+1), LDA, BETA, C, LDC,
00533      $                         LCMQ*NB )
00534               ELSE
00535                 CALL PBSTR2BT( ICONTXT, 'Row', TRANS, M, NQ-I*NB, NB,
00536      $                         A(1,I*NB+1), LDA, ZERO, WORK, NP0,
00537      $                         LCMQ*NB )
00538               END IF
00539             END IF
00540    50     CONTINUE
00541 *
00542 *         Get diagonal blocks of A for each row of the template
00543 *
00544           MCCOL = MOD( MOD(MRROW, NPCOL)+IACOL, NPCOL )
00545           IF( LCMP.GT.1 ) THEN
00546             MCROW = MOD( NPROW+MYROW-ICROW, NPROW )
00547             CALL PBSTRGET( ICONTXT, 'Col', NP0, M, ICEIL(N,NB), WORK,
00548      $                     NP0, MCROW, MCCOL, IGD, MYROW, MYCOL, NPROW,
00549      $                     NPCOL )
00550           END IF
00551 *
00552 *         Broadcast a column block of WORK in every column of template
00553 *
00554           IF( ICCOL.EQ.-1 ) THEN
00555             IF( MYCOL.EQ.MCCOL ) THEN
00556               IF( LCMP.GT.1 )
00557      $          CALL PBSTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0,
00558      $                         BETA, C, LDC, LCMP, LCMQ, NP0 )
00559               CALL SGEBS2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC )
00560             ELSE
00561               CALL SGEBR2D( ICONTXT, 'Row', '1-tree', NP, M, C, LDC,
00562      $                       MYROW, MCCOL )
00563             END IF
00564 *
00565 *         Send a column block of WORK to the destination column
00566 *
00567           ELSE
00568             IF( LCMP.EQ.1 ) THEN
00569               IF( MYCOL.EQ.MCCOL ) THEN
00570                 IF( MYCOL.NE.ICCOL )
00571      $            CALL SGESD2D( ICONTXT, NP, M, WORK, NP, MYROW, ICCOL )
00572               ELSE IF( MYCOL.EQ.ICCOL ) THEN
00573                 IF( BETA.EQ.ZERO ) THEN
00574                   CALL SGERV2D( ICONTXT, NP, M, C, LDC, MYROW, MCCOL )
00575                 ELSE
00576                   CALL SGERV2D( ICONTXT, NP, M, WORK, NP, MYROW, MCCOL )
00577                   CALL PBSMATADD( ICONTXT, 'G', NP, M, ONE, WORK, NP,
00578      $                            BETA, C, LDC )
00579                 END IF
00580               END IF
00581 *
00582             ELSE
00583               ML = M * MIN( LCMP, MAX( 0, ICEIL(N,NB) - MCROW ) )
00584               IF( MYCOL.EQ.MCCOL ) THEN
00585                 IF( MYCOL.NE.ICCOL )
00586      $            CALL SGESD2D( ICONTXT, NP0, ML, WORK, NP0,
00587      $                          MYROW, ICCOL )
00588               ELSE IF( MYCOL.EQ.ICCOL ) THEN
00589                 CALL SGERV2D( ICONTXT, NP0, ML, WORK, NP0,
00590      $                        MYROW, MCCOL )
00591               END IF
00592 *
00593               IF( MYCOL.EQ.ICCOL )
00594      $          CALL PBSTRSRT( ICONTXT, 'Col', NP, M, NB, WORK, NP0,
00595      $                         BETA, C, LDC, LCMP, LCMQ, NP0 )
00596             END IF
00597           END IF
00598 *
00599         END IF
00600       END IF
00601 *
00602       RETURN
00603 *
00604 *     End of PBSTRAN
00605 *
00606       END
00607 *
00608 *=======================================================================
00609 *     SUBROUTINE PBSTR2AT
00610 *=======================================================================
00611 *
00612       SUBROUTINE PBSTR2AT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,
00613      $                     BETA, B, LDB, LCMP, LCMQ )
00614 *
00615 *  -- PB-BLAS routine (version 2.1) --
00616 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
00617 *     April 28, 1996
00618 *
00619 *     .. Scalar Arguments ..
00620       CHARACTER*1        ADIST, TRANS
00621       INTEGER            ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB
00622       REAL               BETA
00623 *     ..
00624 *     .. Array Arguments ..
00625       REAL               A( LDA, * ), B( LDB, * )
00626 *     ..
00627 *
00628 *  Purpose
00629 *  =======
00630 *
00631 *  PBSTR2AT forms   B <== A^T + beta*B, or A^C + beta*B
00632 *  B is a ((conjugate) transposed) scattered block row (or column),
00633 *  copied from a scattered block column (or row) of A
00634 *
00635 *  =====================================================================
00636 *
00637 *     .. Parameters ..
00638       REAL               ONE
00639       PARAMETER          ( ONE = 1.0E+0 )
00640 *     ..
00641 *     .. Local Scalars ..
00642       INTEGER            IA, IB, K, INTV, JNTV
00643 *     ..
00644 *     .. External Subroutines ..
00645       EXTERNAL           PBSMATADD
00646 *     ..
00647 *     .. External Functions ..
00648       LOGICAL            LSAME
00649       INTEGER            ICEIL
00650       EXTERNAL           LSAME, ICEIL
00651 *     ..
00652 *     .. Intrinsic Functions ..
00653       INTRINSIC          MIN
00654 *     ..
00655 *     .. Excutable Statements ..
00656 *
00657       IF( LCMP.EQ.LCMQ ) THEN
00658          CALL PBSMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B,
00659      $                   LDB )
00660 *
00661       ELSE
00662 *
00663 *        If A is a column block ( ADIST = 'C' ),
00664 *
00665          IF( LSAME( ADIST, 'C' ) ) THEN
00666             INTV = LCMP * NB
00667             JNTV = LCMQ * NB
00668             IA = 1
00669             IB = 1
00670             DO 10 K = 1, ICEIL( M, INTV )
00671                CALL PBSMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ),
00672      $                         ONE, A(IA,1), LDA, BETA, B(1,IB), LDB )
00673                IA = IA + INTV
00674                IB = IB + JNTV
00675    10       CONTINUE
00676 *
00677 *        If A is a row block ( ADIST = 'R' ),
00678 *
00679          ELSE
00680             INTV = LCMP * NB
00681             JNTV = LCMQ * NB
00682             IA = 1
00683             IB = 1
00684             DO 20 K = 1, ICEIL( N, JNTV )
00685                CALL PBSMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M,
00686      $                         ONE, A(1,IA), LDA, BETA, B(IB,1), LDB )
00687                IA = IA + JNTV
00688                IB = IB + INTV
00689    20       CONTINUE
00690          END IF
00691       END IF
00692 *
00693       RETURN
00694 *
00695 *     End of PBSTR2AT
00696 *
00697       END
00698 *
00699 *=======================================================================
00700 *     SUBROUTINE PBSTR2BT
00701 *=======================================================================
00702 *
00703       SUBROUTINE PBSTR2BT( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,
00704      $                     BETA, B, LDB, INTV )
00705 *
00706 *  -- PB-BLAS routine (version 2.1) --
00707 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
00708 *     April 28, 1996
00709 *
00710 *     .. Scalar Arguments ..
00711       CHARACTER*1        ADIST, TRANS
00712       INTEGER            ICONTXT, INTV, LDA, LDB, M, N, NB
00713       REAL               BETA
00714 *     ..
00715 *     .. Array Arguments ..
00716       REAL               A( LDA, * ), B( LDB, * )
00717 *     ..
00718 *
00719 *  Purpose
00720 *  =======
00721 *
00722 *  PBSTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a
00723 *  ((conjugate) transposed) condensed block row (or column), copied from
00724 *  a scattered block column (or row) of A
00725 *
00726 *  =====================================================================
00727 *
00728 *     .. Parameters ..
00729       REAL               ONE
00730       PARAMETER          ( ONE = 1.0E+0 )
00731 *     ..
00732 *     .. Local Scalars ..
00733       INTEGER            IA, IB, K
00734 *     ..
00735 *     .. External Functions ..
00736       LOGICAL            LSAME
00737       INTEGER            ICEIL
00738       EXTERNAL           LSAME, ICEIL
00739 *     ..
00740 *     .. External Subroutines ..
00741       EXTERNAL           PBSMATADD
00742 *     ..
00743 *     .. Intrinsic Functions ..
00744       INTRINSIC          MIN
00745 *     ..
00746 *     .. Excutable Statements ..
00747 *
00748       IF( INTV.EQ.NB ) THEN
00749          CALL PBSMATADD( ICONTXT, TRANS, N, M, ONE, A, LDA, BETA, B,
00750      $                   LDB )
00751 *
00752       ELSE
00753 *
00754 *        If A is a column block ( ADIST = 'C' ),
00755 *
00756          IF( LSAME( ADIST, 'C' ) ) THEN
00757             IA = 1
00758             IB = 1
00759             DO 10 K = 1, ICEIL( M, INTV )
00760                CALL PBSMATADD( ICONTXT, TRANS, N, MIN( M-IA+1, NB ),
00761      $                         ONE, A(IA,1), LDA, BETA, B(1,IB), LDB )
00762                IA = IA + INTV
00763                IB = IB + NB
00764    10       CONTINUE
00765 *
00766 *        If A is a row block (ADIST = 'R'),
00767 *
00768          ELSE
00769             IA = 1
00770             IB = 1
00771             DO 20 K = 1, ICEIL( N, INTV )
00772                CALL PBSMATADD( ICONTXT, TRANS, MIN( N-IA+1, NB ), M,
00773      $                         ONE, A(1,IA), LDA, BETA, B(IB,1), LDB )
00774                IA = IA + INTV
00775                IB = IB + NB
00776    20       CONTINUE
00777          END IF
00778       END IF
00779 *
00780       RETURN
00781 *
00782 *     End of PBSTR2BT
00783 *
00784       END
00785 *
00786 *=======================================================================
00787 *     SUBROUTINE PBSTR2AF
00788 *=======================================================================
00789 *
00790       SUBROUTINE PBSTR2AF( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B,
00791      $                     LDB, LCMP, LCMQ, NINT )
00792 *
00793 *  -- PB-BLAS routine (version 2.1) --
00794 *     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
00795 *     April 28, 1996
00796 *
00797 *     .. Scalar Arguments ..
00798       CHARACTER*1          ADIST
00799       INTEGER              ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT
00800       REAL                 BETA
00801 *     ..
00802 *     .. Array Arguments ..
00803       REAL                 A( LDA, * ), B( LDB, * )
00804 *     ..
00805 *
00806 *  Purpose
00807 *  =======
00808 *
00809 *  PBSTR2AF forms  T <== A + BETA*T, where T is a scattered block
00810 *  row (or column) copied from a (condensed) block column (or row) of A
00811 *
00812 *  =====================================================================
00813 *
00814 *     .. Parameters ..
00815       REAL               ONE
00816       PARAMETER          ( ONE = 1.0E+0 )
00817 *     ..
00818 *     .. Local Scalars ..
00819       INTEGER            JA, JB, K, INTV
00820 *     ..
00821 *     .. External Functions ..
00822       LOGICAL            LSAME
00823       INTEGER            ICEIL
00824       EXTERNAL           LSAME, ICEIL
00825 *     ..
00826 *     .. Intrinsic Functions ..
00827       INTRINSIC          MIN
00828 *     ..
00829 *     .. Executable Statements ..
00830 *
00831       IF( LSAME( ADIST, 'R' ) ) THEN
00832          INTV = NB * LCMQ
00833          JA = 1
00834          JB = 1
00835          DO 10 K = 1, ICEIL( NINT, NB )
00836             CALL PBSMATADD( ICONTXT, 'G', M, MIN( N-JB+1, NB ), ONE,
00837      $                      A(1,JA), LDA, BETA, B(1,JB), LDB )
00838             JA = JA + NB
00839             JB = JB + INTV
00840    10    CONTINUE
00841 *
00842 *     if( LSAME( ADIST, 'C' ) ) then
00843 *
00844       ELSE
00845          INTV = NB * LCMP
00846          JA = 1
00847          JB = 1
00848          DO 20 K = 1, ICEIL( NINT, NB )
00849             CALL PBSMATADD( ICONTXT, 'G', MIN( M-JB+1, NB ), N, ONE,
00850      $                      A(JA,1), LDA, BETA, B(JB,1), LDB )
00851             JA = JA + NB
00852             JB = JB + INTV
00853    20    CONTINUE
00854       END IF
00855 *
00856       RETURN
00857 *
00858 *     End of PBSTR2AF
00859 *
00860       END