d6/d8b/pbdtran_8f_source.html

      SUBROUTINE pbdtran( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA,

     $                    C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK )

*

*  -- PB-BLAS routine (version 2.1) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.

*     April 28, 1996

*

*     Jaeyoung Choi, Oak Ridge National Laboratory

*     Jack Dongarra, University of Tennessee and Oak Ridge National Lab.

*     David Walker,  Oak Ridge National Laboratory

*

*     .. Scalar Arguments ..

      CHARACTER*1        ADIST, TRANS

      INTEGER            IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC,

     $                   m, n, nb

      DOUBLE PRECISION   BETA

*     ..

*     .. Array Arguments ..

      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), WORK( * )

*     ..

*

*  Purpose

*  =======

*

*  PBDTRAN  transposes  a column block to row block, or a row block to

*  column block by reallocating data distribution.

*

*     C := A^T + beta*C, or C := A^C + beta*C

*

*  where A is an M-by-N matrix  and C is an N-by-M matrix, and the size

*  of M or N is limited to its block size NB.

*

*  The first elements  of the matrices A, and C  should  be  located  at

*  the beginnings of their first blocks. (not the middle of the blocks.)

*

*  Parameters

*  ==========

*

*  ICONTXT (input) INTEGER

*          ICONTXT is the BLACS mechanism for partitioning communication

*          space.  A defining property of a context is that a message in

*          a context cannot be sent or received in another context.  The

*          BLACS context includes the definition of a grid, and each

*          process' coordinates in it.

*

*  ADIST  - (input) CHARACTER*1

*           ADIST specifies whether A is a column block or a row block.

*

*              ADIST = 'C',  A is a column block

*              ADIST = 'R',  A is a row block

*

*  TRANS  - (input) CHARACTER*1

*           TRANS specifies whether the transposed format is transpose

*           or conjugate transpose.  If the matrices A and C are real,

*           the argument is ignored.

*

*              TRANS = 'T',  transpose

*              TRANS = 'C',  conjugate transpose

*

*  M      - (input) INTEGER

*           M specifies the (global) number of rows of the matrix (block

*           column or block row) A and of columns of the matrix C.

*           M >= 0.

*

*  N      - (input) INTEGER

*           N specifies the (global) number of columns of the matrix

*           (block column or block row) A  and of columns of the matrix

*           C.  N >= 0.

*

*  NB     - (input) INTEGER

*           NB specifies  the column block size of the matrix A and the

*           row block size of the matrix C when ADIST = 'C'.  Otherwise,

*           it specifies  the row block size of the matrix A and the

*           column block size of the matrix C. NB >= 1.

*

*  A       (input) DOUBLE PRECISION array of DIMENSION ( LDA, Lx ),

*          where Lx is N  when ADIST = 'C', or Nq when ADIST = 'R'.

*          Before entry with  ADIST = 'C',  the leading Mp by N part of

*          the array A must contain the matrix A, otherwise the leading

*          M by Nq part of the array A  must contain the matrix A.  See

*          parameter details for the values of Mp and Nq.

*

*  LDA     (input) INTEGER

*          LDA specifies the leading dimension of (local) A as declared

*          in the calling (sub) program.  LDA >= MAX(1,Mp) when

*          ADIST = 'C', or LDA >= MAX(1,M) otherwise.

*

*  BETA    (input) DOUBLE PRECISION

*          BETA specifies scaler beta.

*

*  C       (input/output) DOUBLE PRECISION array of DIMENSION

*          ( LDC, Lx ),

*          where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'.

*          If ADIST = 'C', the leading N-by-Mq part of the array C

*          contains the (local) matrix C, otherwise the leading

*          Np-by-M part of the array C must contain the (local) matrix

*          C.  C will not be referenced if beta is zero.

*

*  LDC     (input) INTEGER

*          LDC specifies the leading dimension of (local) C as declared

*          in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C',

*          or LDC >= MAX(1,Np) otherwise.

*

*  IAROW   (input) INTEGER

*          IAROW specifies  a row  of the process  template,

*          which holds the first block  of the matrix  A. If A is a row

*          of blocks (ADIST = 'R') and all rows of processes have a copy

*          of A, then set IAROW = -1.

*

*  IACOL   (input) INTEGER

*          IACOL specifies  a column of the process template,

*          which holds  the first block  of the matrix A.  If  A is  a

*          column of blocks (ADIST = 'C') and all columns of processes

*          have a copy of A, then set IACOL = -1.

*

*  ICROW   (input) INTEGER

*          ICROW specifies the current row process which holds

*          the first block  of the matrix C,  which is transposed of A.

*          If C is a row of blocks (ADIST = 'C') and the transposed

*          row block C is distributed all rows of processes, set

*          ICROW = -1.

*

*  ICCOL   (input) INTEGER

*          ICCOL specifies  the current column process which holds

*          the first block of the matrix C,  which is transposed of A.

*          If C is a column of blocks (ADIST = 'R') and the transposed

*          column block C is distributed all columns of processes,

*          set ICCOL = -1.

*

*  WORK    (workspace) DOUBLE PRECISION array of dimension Size(WORK).

*          It needs extra working space of A'.

*

*  Parameters Details

*  ==================

*

*  Lx      It is  a local portion  of L  owned  by  a process,  (L is

*          replaced by M, or N,  and x is replaced by either p (=NPROW)

*          or q (=NPCOL)).  The value is  determined by  L, LB, x,  and

*          MI, where  LB is  a block size  and  MI is a  row  or column

*          position  in a process template.  Lx is  equal to  or less

*          than Lx0 = CEIL( L, LB*x ) * LB.

*

*  Communication Scheme

*  ====================

*

*  The communication scheme of the routine is set to '1-tree', which is

*  fan-out.  (For details, see BLACS user's guide.)

*

*  Memory Requirement of WORK

*  ==========================

*

*  Mqb  = CEIL( M, NB*NPCOL )

*  Npb  = CEIL( N, NB*NPROW )

*  LCMQ = LCM / NPCOL

*  LCMP = LCM / NPROW

*

*  (1) ADIST = 'C'

*   (a) IACOL != -1

*       Size(WORK) = N * CEIL(Mqb,LCMQ)*NB

*   (b) IACOL = -1

*       Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB))

*

*  (2) ADIST = 'R'

*   (a) IAROW != -1

*       Size(WORK) = M * CEIL(Npb,LCMP)*NB

*   (b) IAROW = -1

*       Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB))

*

*  Notes

*  -----

*  More precise space can be computed as

*

*  CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ )

*  CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP )

*

*  =====================================================================

*

*     ..

*     .. Parameters ..

      DOUBLE PRECISION   ONE, ZERO

      parameter( one = 1.0d+0, zero = 0.0d+0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            COLFORM, ROWFORM

      INTEGER            I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM,

     $                   lcmp, lcmq, mccol, mcrow, ml, mp, mq, mq0,

     $                   mrcol, mrrow, mycol, myrow, np, np0, npcol,

     $                   nprow, nq

      DOUBLE PRECISION   TBETA

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            ILCM, ICEIL, NUMROC

      EXTERNAL           ilcm, iceil, lsame, numroc

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, dgebr2d, dgebs2d, dgerv2d,

     $                   dgesd2d, pbdmatadd, pbdtr2af, pbdtr2at,

     $                   pbdtr2bt, pbdtrget, pbdtrsrt, pxerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min, mod

*     ..

*     .. Executable Statements ..

*

*     Quick return if possible.

*

      IF( m.EQ.0 .OR. n.EQ.0 ) RETURN

*

      CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )

*

      colform = lsame( adist, 'C' )

      rowform = lsame( adist, 'R' )

*

*     Test the input parameters.

*

      info = 0

      IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN

         info = 2

      ELSE IF( m .LT.0                            ) THEN

         info = 4

      ELSE IF( n .LT.0                            ) THEN

         info = 5

      ELSE IF( nb.LT.1                            ) THEN

         info = 6

      ELSE IF( iarow.LT.-1 .OR. iarow.GE.nprow .OR.

     $       ( iarow.EQ.-1 .AND. colform )        ) THEN

         info = 12

      ELSE IF( iacol.LT.-1 .OR. iacol.GE.npcol .OR.

     $       ( iacol.EQ.-1 .AND. rowform )        ) THEN

         info = 13

      ELSE IF( icrow.LT.-1 .OR. icrow.GE.nprow .OR.

     $       ( icrow.EQ.-1 .AND. rowform )        ) THEN

         info = 14

      ELSE IF( iccol.LT.-1 .OR. iccol.GE.npcol .OR.

     $       ( iccol.EQ.-1 .AND. colform )        ) THEN

         info = 15

      END IF

*

   10 CONTINUE

      IF( info .NE. 0 ) THEN

         CALL pxerbla( icontxt, 'PBDTRAN ', info )

         RETURN

      END IF

*

*     Start the operations.

*

*     LCM : the least common multiple of NPROW and NPCOL

*

      lcm  = ilcm( nprow, npcol )

      lcmp = lcm   / nprow

      lcmq = lcm   / npcol

      igd  = npcol / lcmp

*

*     When A is a column block

*

      IF( colform ) THEN

*

*       Form  C <== A'  ( A is a column block )

*                                         _

*                                        | |

*                                        | |

*            _____________               | |

*           |______C______|     <==      |A|

*                                        | |

*                                        | |

*                                        |_|

*

*       MRROW : row relative position in template from IAROW

*       MRCOL : column relative position in template from ICCOL

*

        mrrow = mod( nprow+myrow-iarow, nprow )

        mrcol = mod( npcol+mycol-iccol, npcol )

        jcrow = icrow

        IF( icrow.EQ.-1 ) jcrow = iarow

*

        mp  = numroc( m, nb, myrow, iarow, nprow )

        mq  = numroc( m, nb, mycol, iccol, npcol )

        mq0 = numroc( numroc(m, nb, 0, 0, npcol), nb, 0, 0, lcmq )

*

        IF( lda.LT.mp .AND.

     $         ( iacol.EQ.mycol .OR. iacol.EQ.-1 ) ) THEN

           info = 8

        ELSE IF( ldc.LT.n .AND.

     $         ( icrow.EQ.myrow .OR. icrow.EQ.-1 ) ) THEN

           info = 11

        END IF

        IF( info.NE.0 ) GO TO 10

*

*       When a column process of IACOL has a column block A,

*

        IF( iacol.GE.0 ) THEN

          tbeta = zero

          IF( myrow.EQ.jcrow ) tbeta = beta

*

          DO 20 i = 0, min( lcm, iceil(m,nb) ) - 1

            mcrow = mod( mod(i, nprow) + iarow, nprow )

            mccol = mod( mod(i, npcol) + iccol, npcol )

            IF( lcmq.EQ.1 )  mq0 = numroc( m, nb, i, 0, npcol )

            jdex = (i/npcol) * nb

*

*           A source node copies the blocks to WORK, and send it

*

            IF( myrow.EQ.mcrow .AND. mycol.EQ.iacol ) THEN

*

*             The source node is a destination node

*

              idex = (i/nprow) * nb

              IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN

                CALL pbdtr2at( icontxt, 'Col', trans, mp-idex, n, nb,

     $                         a(idex+1,1), lda, tbeta, c(1,jdex+1),

     $                         ldc, lcmp, lcmq )

*

*             The source node sends blocks to a destination node

*

              ELSE

                CALL pbdtr2bt( icontxt, 'Col', trans, mp-idex, n, nb,

     $                         a(idex+1,1), lda, zero, work, n,

     $                         lcmp*nb )

                CALL dgesd2d( icontxt, n, mq0, work, n, jcrow, mccol )

              END IF

*

*           A destination node receives the copied blocks

*

            ELSE IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN

              IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN

                CALL dgerv2d( icontxt, n, mq0, c, ldc, mcrow, iacol )

              ELSE

                CALL dgerv2d( icontxt, n, mq0, work, n, mcrow, iacol )

                CALL pbdtr2af( icontxt, 'Row', n, mq-jdex, nb, work, n,

     $                         tbeta, c(1,jdex+1), ldc, lcmp, lcmq,

     $                         mq0 )

              END IF

            END IF

   20     CONTINUE

*

*         Broadcast a row block of C in each column of template

*

          IF( icrow.EQ.-1 ) THEN

            IF( myrow.EQ.jcrow ) THEN

              CALL dgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )

            ELSE

              CALL dgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,

     $                      jcrow, mycol )

            END IF

          END IF

*

*       When all column procesors have a copy of the column block A,

*

        ELSE

          IF( lcmq.EQ.1 ) mq0 = mq

*

*         Processors, which have diagonal blocks of A, copy them to

*         WORK array in transposed form

*

          DO 30 i = 0, lcmp-1

            IF( mrcol.EQ.mod( nprow*i+mrrow, npcol ) ) THEN

              IF( lcmq.EQ.1.AND.(icrow.EQ.-1.OR.icrow.EQ.myrow) ) THEN

                 CALL pbdtr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,

     $                          a(i*nb+1,1), lda, beta, c, ldc,

     $                          lcmp*nb )

              ELSE

                 CALL pbdtr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,

     $                          a(i*nb+1,1), lda, zero, work, n,

     $                          lcmp*nb )

              END IF

            END IF

   30     CONTINUE

*

*         Get diagonal blocks of A for each column of the template

*

          mcrow = mod( mod(mrcol,nprow)+iarow, nprow )

          IF( lcmq.GT.1 ) THEN

            mccol = mod( npcol+mycol-iccol, npcol )

            CALL pbdtrget( icontxt, 'Row', n, mq0, iceil(m,nb), work, n,

     $                     mcrow,  mccol, igd, myrow, mycol, nprow,

     $                     npcol )

          END IF

*

*         Broadcast a row block of WORK in every row of template

*

          IF( icrow.EQ.-1 ) THEN

            IF( myrow.EQ.mcrow ) THEN

              IF( lcmq.GT.1 )

     $          CALL pbdtrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,

     $                         c, ldc, lcmp, lcmq, mq0 )

              CALL dgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )

            ELSE

              CALL dgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,

     $                      mcrow, mycol )

            END IF

*

*         Send a row block of WORK to the destination row

*

          ELSE

            IF( lcmq.EQ.1 ) THEN

              IF( myrow.EQ.mcrow ) THEN

                IF( myrow.NE.icrow )

     $            CALL dgesd2d( icontxt, n, mq, work, n, icrow, mycol )

              ELSE IF( myrow.EQ.icrow ) THEN

                IF( beta.EQ.zero ) THEN

                  CALL dgerv2d( icontxt, n, mq, c, ldc, mcrow, mycol )

                ELSE

                  CALL dgerv2d( icontxt, n, mq, work, n, mcrow, mycol )

                  CALL pbdmatadd( icontxt, 'G', n, mq, one, work, n,

     $                            beta, c, ldc )

                END IF

              END IF

*

            ELSE

              ml = mq0 * min( lcmq, max(0,iceil(m,nb)-mccol) )

              IF( myrow.EQ.mcrow ) THEN

                IF( myrow.NE.icrow )

     $            CALL dgesd2d( icontxt, n, ml, work, n, icrow, mycol )

              ELSE IF( myrow.EQ.icrow ) THEN

                CALL dgerv2d( icontxt, n, ml, work, n, mcrow, mycol )

              END IF

*

              IF( myrow.EQ.icrow )

     $          CALL pbdtrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,

     $                         c, ldc, lcmp, lcmq, mq0 )

            END IF

          END IF

*

        END IF

*

*     When A is a row block

*

      ELSE

*

*        Form  C <== A'  ( A is a row block )

*            _

*           | |

*           | |

*           | |                _____________

*           |C|      <==      |______A______|

*           | |

*           | |

*           |_|

*

*        MRROW : row relative position in template from ICROW

*        MRCOL : column relative position in template from IACOL

*

         mrrow = mod( nprow+myrow-icrow, nprow )

         mrcol = mod( npcol+mycol-iacol, npcol )

         jccol = iccol

         IF( iccol.EQ.-1 ) jccol = iacol

*

         np  = numroc( n, nb, myrow, icrow, nprow )

         nq  = numroc( n, nb, mycol, iacol, npcol )

         np0 = numroc( numroc(n, nb, 0, 0, nprow), nb, 0, 0, lcmp )

*

         IF( lda.LT.m .AND.

     $          ( iarow.EQ.myrow .OR. iarow.EQ.-1 ) ) THEN

            info = 8

         ELSE IF( ldc.LT.np .AND.

     $          ( iccol.EQ.mycol .OR. iccol.EQ.-1 ) ) THEN

            info = 11

         END IF

         IF( info.NE.0 ) GO TO 10

*

*        When a row process of IAROW has a row block A,

*

         IF( iarow.GE.0 ) THEN

           tbeta = zero

           IF( mycol.EQ.jccol ) tbeta = beta

*

           DO 40 i = 0, min( lcm, iceil(n,nb) ) - 1

             mcrow = mod( mod(i, nprow) + icrow, nprow )

             mccol = mod( mod(i, npcol) + iacol, npcol )

             IF( lcmp.EQ.1 )  np0 = numroc( n, nb, i, 0, nprow )

             idex = (i/nprow) * nb

*

*            A source node copies the blocks to WORK, and send it

*

             IF( myrow.EQ.iarow .AND. mycol.EQ.mccol ) THEN

*

*              The source node is a destination node

*

               jdex = (i/npcol) * nb

               IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN

                 CALL pbdtr2at( icontxt, 'Row', trans, m, nq-jdex, nb,

     $                          a(1,jdex+1), lda, tbeta, c(idex+1,1),

     $                          ldc, lcmp, lcmq )

*

*              The source node sends blocks to a destination node

*

               ELSE

                 CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-jdex, nb,

     $                          a(1,jdex+1), lda, zero, work, np0,

     $                          lcmq*nb )

                 CALL dgesd2d( icontxt, np0, m, work, np0,

     $                         mcrow, jccol )

               END IF

*

*           A destination node receives the copied blocks

*

            ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN

              IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN

                CALL dgerv2d( icontxt, np0, m, c, ldc, iarow, mccol )

              ELSE

                CALL dgerv2d( icontxt, np0, m, work, np0, iarow, mccol )

                CALL pbdtr2af( icontxt, 'Col', np-idex, m, nb, work,

     $                         np0, tbeta, c(idex+1,1), ldc, lcmp, lcmq,

     $                         np0 )

              END IF

            END IF

   40     CONTINUE

*

*         Broadcast a column block of WORK in each row of template

*

          IF( iccol.EQ.-1 ) THEN

            IF( mycol.EQ.jccol ) THEN

              CALL dgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )

            ELSE

              CALL dgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,

     $                       myrow, jccol )

            END IF

          END IF

*

*       When all row procesors have a copy of the row block A,

*

        ELSE

          IF( lcmp.EQ.1 ) np0 = np

*

*         Processors, which have diagonal blocks of A, copy them to

*         WORK array in transposed form

*

          DO 50 i = 0, lcmq-1

            IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN

              IF( lcmp.EQ.1.AND.(iccol.EQ.-1.OR.iccol.EQ.mycol) ) THEN

                CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,

     $                         a(1,i*nb+1), lda, beta, c, ldc,

     $                         lcmq*nb )

              ELSE

                CALL pbdtr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,

     $                         a(1,i*nb+1), lda, zero, work, np0,

     $                         lcmq*nb )

              END IF

            END IF

   50     CONTINUE

*

*         Get diagonal blocks of A for each row of the template

*

          mccol = mod( mod(mrrow, npcol)+iacol, npcol )

          IF( lcmp.GT.1 ) THEN

            mcrow = mod( nprow+myrow-icrow, nprow )

            CALL pbdtrget( icontxt, 'Col', np0, m, iceil(n,nb), work,

     $                     np0, mcrow, mccol, igd, myrow, mycol, nprow,

     $                     npcol )

          END IF

*

*         Broadcast a column block of WORK in every column of template

*

          IF( iccol.EQ.-1 ) THEN

            IF( mycol.EQ.mccol ) THEN

              IF( lcmp.GT.1 )

     $          CALL pbdtrsrt( icontxt, 'Col', np, m, nb, work, np0,

     $                         beta, c, ldc, lcmp, lcmq, np0 )

              CALL dgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )

            ELSE

              CALL dgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,

     $                       myrow, mccol )

            END IF

*

*         Send a column block of WORK to the destination column

*

          ELSE

            IF( lcmp.EQ.1 ) THEN

              IF( mycol.EQ.mccol ) THEN

                IF( mycol.NE.iccol )

     $            CALL dgesd2d( icontxt, np, m, work, np, myrow, iccol )

              ELSE IF( mycol.EQ.iccol ) THEN

                IF( beta.EQ.zero ) THEN

                  CALL dgerv2d( icontxt, np, m, c, ldc, myrow, mccol )

                ELSE

                  CALL dgerv2d( icontxt, np, m, work, np, myrow, mccol )

                  CALL pbdmatadd( icontxt, 'G', np, m, one, work, np,

     $                            beta, c, ldc )

                END IF

              END IF

*

            ELSE

              ml = m * min( lcmp, max( 0, iceil(n,nb) - mcrow ) )

              IF( mycol.EQ.mccol ) THEN

                IF( mycol.NE.iccol )

     $            CALL dgesd2d( icontxt, np0, ml, work, np0,

     $                          myrow, iccol )

              ELSE IF( mycol.EQ.iccol ) THEN

                CALL dgerv2d( icontxt, np0, ml, work, np0,

     $                        myrow, mccol )

              END IF

*

              IF( mycol.EQ.iccol )

     $          CALL pbdtrsrt( icontxt, 'Col', np, m, nb, work, np0,

     $                         beta, c, ldc, lcmp, lcmq, np0 )

            END IF

          END IF

*

        END IF

      END IF

*

      RETURN

*

*     End of PBDTRAN

*

      END

*

*=======================================================================

*     SUBROUTINE PBDTR2AT

*=======================================================================

*

      SUBROUTINE pbdtr2at( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,

     $                     BETA, B, LDB, LCMP, LCMQ )

*

*  -- PB-BLAS routine (version 2.1) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.

*     April 28, 1996

*

*     .. Scalar Arguments ..

      CHARACTER*1        ADIST, TRANS

      INTEGER            ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB

      DOUBLE PRECISION   BETA

*     ..

*     .. Array Arguments ..

      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )

*     ..

*

*  Purpose

*  =======

*

*  PBDTR2AT forms   B <== A^T + beta*B, or A^C + beta*B

*  B is a ((conjugate) transposed) scattered block row (or column),

*  copied from a scattered block column (or row) of A

*

*  =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   ONE

      PARAMETER          ( ONE = 1.0d+0 )

*     ..

*     .. Local Scalars ..

      INTEGER            IA, IB, K, INTV, JNTV

*     ..

*     .. External Subroutines ..

      EXTERNAL           pbdmatadd

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            ICEIL

      EXTERNAL           lsame, iceil

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min

*     ..

*     .. Excutable Statements ..

*

      IF( lcmp.EQ.lcmq ) THEN

         CALL pbdmatadd( icontxt, trans, n, m, one, a, lda, beta, b,

     $                   ldb )

*

      ELSE

*

*        If A is a column block ( ADIST = 'C' ),

*

         IF( lsame( adist, 'C' ) ) THEN

            intv = lcmp * nb

            jntv = lcmq * nb

            ia = 1

            ib = 1

            DO 10 k = 1, iceil( m, intv )

               CALL pbdmatadd( icontxt, trans, n, min( m-ia+1, nb ),

     $                         one, a(ia,1), lda, beta, b(1,ib), ldb )

               ia = ia + intv

               ib = ib + jntv

   10       CONTINUE

*

*        If A is a row block ( ADIST = 'R' ),

*

         ELSE

            intv = lcmp * nb

            jntv = lcmq * nb

            ia = 1

            ib = 1

            DO 20 k = 1, iceil( n, jntv )

               CALL pbdmatadd( icontxt, trans, min( n-ia+1, nb ), m,

     $                         one, a(1,ia), lda, beta, b(ib,1), ldb )

               ia = ia + jntv

               ib = ib + intv

   20       CONTINUE

         END IF

      END IF

*

      RETURN

*

*     End of PBDTR2AT

*

      END

*

*=======================================================================

*     SUBROUTINE PBDTR2BT

*=======================================================================

*

      SUBROUTINE pbdtr2bt( ICONTXT, ADIST, TRANS, M, N, NB, A, LDA,

     $                     BETA, B, LDB, INTV )

*

*  -- PB-BLAS routine (version 2.1) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.

*     April 28, 1996

*

*     .. Scalar Arguments ..

      CHARACTER*1        ADIST, TRANS

      INTEGER            ICONTXT, INTV, LDA, LDB, M, N, NB

      DOUBLE PRECISION   BETA

*     ..

*     .. Array Arguments ..

      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )

*     ..

*

*  Purpose

*  =======

*

*  PBDTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a

*  ((conjugate) transposed) condensed block row (or column), copied from

*  a scattered block column (or row) of A

*

*  =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   ONE

      PARAMETER          ( ONE = 1.0d+0 )

*     ..

*     .. Local Scalars ..

      INTEGER            IA, IB, K

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            ICEIL

      EXTERNAL           LSAME, ICEIL

*     ..

*     .. External Subroutines ..

      EXTERNAL           pbdmatadd

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min

*     ..

*     .. Excutable Statements ..

*

      IF( intv.EQ.nb ) THEN

         CALL pbdmatadd( icontxt, trans, n, m, one, a, lda, beta, b,

     $                   ldb )

*

      ELSE

*

*        If A is a column block ( ADIST = 'C' ),

*

         IF( lsame( adist, 'C' ) ) THEN

            ia = 1

            ib = 1

            DO 10 k = 1, iceil( m, intv )

               CALL pbdmatadd( icontxt, trans, n, min( m-ia+1, nb ),

     $                         one, a(ia,1), lda, beta, b(1,ib), ldb )

               ia = ia + intv

               ib = ib + nb

   10       CONTINUE

*

*        If A is a row block (ADIST = 'R'),

*

         ELSE

            ia = 1

            ib = 1

            DO 20 k = 1, iceil( n, intv )

               CALL pbdmatadd( icontxt, trans, min( n-ia+1, nb ), m,

     $                         one, a(1,ia), lda, beta, b(ib,1), ldb )

               ia = ia + intv

               ib = ib + nb

   20       CONTINUE

         END IF

      END IF

*

      RETURN

*

*     End of PBDTR2BT

*

      END

*

*=======================================================================

*     SUBROUTINE PBDTR2AF

*=======================================================================

*

      SUBROUTINE pbdtr2af( ICONTXT, ADIST, M, N, NB, A, LDA, BETA, B,

     $                     LDB, LCMP, LCMQ, NINT )

*

*  -- PB-BLAS routine (version 2.1) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.

*     April 28, 1996

*

*     .. Scalar Arguments ..

      CHARACTER*1          ADIST

      INTEGER              ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT

      DOUBLE PRECISION     BETA

*     ..

*     .. Array Arguments ..

      DOUBLE PRECISION     A( LDA, * ), B( LDB, * )

*     ..

*

*  Purpose

*  =======

*

*  PBDTR2AF forms  T <== A + BETA*T, where T is a scattered block

*  row (or column) copied from a (condensed) block column (or row) of A

*

*  =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   ONE

      PARAMETER          ( ONE = 1.0d+0 )

*     ..

*     .. Local Scalars ..

      INTEGER            JA, JB, K, INTV

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            ICEIL

      EXTERNAL           LSAME, ICEIL

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min

*     ..

*     .. Executable Statements ..

*

      IF( lsame( adist, 'R' ) ) THEN

         intv = nb * lcmq

         ja = 1

         jb = 1

         DO 10 k = 1, iceil( nint, nb )

            CALL pbdmatadd( icontxt, 'G', m, min( n-jb+1, nb ), one,

     $                      a(1,ja), lda, beta, b(1,jb), ldb )

            ja = ja + nb

            jb = jb + intv

   10    CONTINUE

*

*     if( LSAME( ADIST, 'C' ) ) then

*

      ELSE

         intv = nb * lcmp

         ja = 1

         jb = 1

         DO 20 k = 1, iceil( nint, nb )

            CALL pbdmatadd( icontxt, 'G', min( m-jb+1, nb ), n, one,

     $                      a(ja,1), lda, beta, b(jb,1), ldb )

            ja = ja + nb

            jb = jb + intv

   20    CONTINUE

      END IF

*

      RETURN

*

*     End of PBDTR2AF

*

      END