da/d58/pdlarf_8f_source.html

      SUBROUTINE pdlarf( SIDE, M, N, V, IV, JV, DESCV, INCV, TAU,

     $                   C, IC, JC, DESCC, WORK )

*

*  -- ScaLAPACK auxiliary routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 25, 2001

*

*     .. Scalar Arguments ..

      CHARACTER          SIDE

      INTEGER            IC, INCV, IV, JC, JV, M, N

*     ..

*     .. Array Arguments ..

      INTEGER            DESCC( * ), DESCV( * )

      DOUBLE PRECISION   C( * ), TAU( * ), V( * ), WORK( * )

*     ..

*

*  Purpose

*  =======

*

*  PDLARF applies a real elementary reflector Q (or Q**T) to a real

*  M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from

*  either the left or the right. Q is represented in the form

*

*        Q = I - tau * v * v'

*

*  where tau is a real scalar and v is a real vector.

*

*  If tau = 0, then Q is taken to be the unit matrix.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Because vectors may be viewed as a subclass of matrices, a

*  distributed vector is considered to be a distributed matrix.

*

*  Restrictions

*  ============

*

*  If SIDE = 'Left' and INCV = 1, then the row process having the first

*  entry V(IV,JV) must also have the first row of sub( C ). Moreover,

*  MOD(IV-1,MB_V) must be equal to MOD(IC-1,MB_C), if INCV=M_V, only

*  the last equality must be satisfied.

*

*  If SIDE = 'Right' and INCV = M_V then the column process having the

*  first entry V(IV,JV) must also have the first column of sub( C ) and

*  MOD(JV-1,NB_V) must be equal to MOD(JC-1,NB_C), if INCV = 1 only the

*  last equality must be satisfied.

*

*  Arguments

*  =========

*

*  SIDE    (global input) CHARACTER

*          = 'L': form  Q * sub( C ),

*          = 'R': form  sub( C ) * Q, Q = Q**T.

*

*  M       (global input) INTEGER

*          The number of rows to be operated on i.e the number of rows

*          of the distributed submatrix sub( C ). M >= 0.

*

*  N       (global input) INTEGER

*          The number of columns to be operated on i.e the number of

*          columns of the distributed submatrix sub( C ). N >= 0.

*

*  V       (local input) DOUBLE PRECISION pointer into the local memory

*          to an array of dimension (LLD_V,*) containing the local

*          pieces of the distributed vectors V representing the

*          Householder transformation Q,

*             V(IV:IV+M-1,JV) if SIDE = 'L' and INCV = 1,

*             V(IV,JV:JV+M-1) if SIDE = 'L' and INCV = M_V,

*             V(IV:IV+N-1,JV) if SIDE = 'R' and INCV = 1,

*             V(IV,JV:JV+N-1) if SIDE = 'R' and INCV = M_V,

*

*          The vector v in the representation of Q. V is not used if

*          TAU = 0.

*

*  IV      (global input) INTEGER

*          The row index in the global array V indicating the first

*          row of sub( V ).

*

*  JV      (global input) INTEGER

*          The column index in the global array V indicating the

*          first column of sub( V ).

*

*  DESCV   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix V.

*

*  INCV    (global input) INTEGER

*          The global increment for the elements of V. Only two values

*          of INCV are supported in this version, namely 1 and M_V.

*          INCV must not be zero.

*

*  TAU     (local input) DOUBLE PRECISION array, dimension  LOCc(JV) if

*          INCV = 1, and LOCr(IV) otherwise. This array contains the

*          Householder scalars related to the Householder vectors.

*          TAU is tied to the distributed matrix V.

*

*  C       (local input/local output) DOUBLE PRECISION pointer into the

*          local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),

*          containing the local pieces of sub( C ). On exit, sub( C )

*          is overwritten by the Q * sub( C ) if SIDE = 'L', or

*          sub( C ) * Q if SIDE = 'R'.

*

*  IC      (global input) INTEGER

*          The row index in the global array C indicating the first

*          row of sub( C ).

*

*  JC      (global input) INTEGER

*          The column index in the global array C indicating the

*          first column of sub( C ).

*

*  DESCC   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix C.

*

*  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)

*          If INCV = 1,

*            if SIDE = 'L',

*              if IVCOL = ICCOL,

*                LWORK >= NqC0

*              else

*                LWORK >= MpC0 + MAX( 1, NqC0 )

*              end if

*            else if SIDE = 'R',

*              LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(

*                       N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )

*            end if

*          else if INCV = M_V,

*            if SIDE = 'L',

*              LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(

*                       M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )

*            else if SIDE = 'R',

*              if IVROW = ICROW,

*                LWORK >= MpC0

*              else

*                LWORK >= NqC0 + MAX( 1, MpC0 )

*              end if

*            end if

*          end if

*

*          where LCM is the least common multiple of NPROW and NPCOL and

*          LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,

*          LCMQ = LCM / NPCOL,

*

*          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),

*          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),

*          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),

*          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),

*          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),

*

*          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;

*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling

*          the subroutine BLACS_GRIDINFO.

*

*  Alignment requirements

*  ======================

*

*  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)

*  must verify some alignment properties, namely the following

*  expressions should be true:

*

*  MB_V = NB_V,

*

*  If INCV = 1,

*    If SIDE = 'Left',

*      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )

*    If SIDE = 'Right',

*      ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )

*  else if INCV = M_V,

*    If SIDE = 'Left',

*      ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )

*    If SIDE = 'Right',

*      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )

*  end if

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   lld_, mb_, m_, nb_, n_, rsrc_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      DOUBLE PRECISION   ONE, ZERO

      parameter( one  = 1.0d+0, zero = 0.0d+0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            CCBLCK, CRBLCK

      CHARACTER          COLBTOP, ROWBTOP

      INTEGER            ICCOL, ICOFF, ICROW, ICTXT, IIC, IIV, IOFFC,

     $                   ioffv, ipw, iroff, ivcol, ivrow, jjc, jjv, ldc,

     $                   ldv, mycol, myrow, mp, ncc, ncv, npcol, nprow,

     $                   nq, rdest

      DOUBLE PRECISION   TAULOC

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, dcopy, dgebr2d, dgebs2d,

     $                   dgemv, dger, dgerv2d, dgesd2d,

     $                   dgsum2d, dlaset, infog2l, pb_topget,

     $                   pbdtrnv

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            NUMROC

      EXTERNAL           lsame, numroc

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min, mod

*     ..

*     .. Executable Statements ..

*

*     Quick return if possible

*

      IF( m.LE.0 .OR. n.LE.0 )

     $   RETURN

*

*     Get grid parameters.

*

      ictxt = descc( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

*

*     Figure local indexes

*

      CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,

     $              icrow, iccol )

      CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,

     $              ivrow, ivcol )

      ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),

     $              npcol )

      ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),

     $              npcol )

      ldc = descc( lld_ )

      ldv = descv( lld_ )

      iic = min( iic, ldc )

      iiv = min( iiv, ldv )

      jjc = min( jjc, ncc )

      jjv = min( jjv, ncv )

      ioffc = iic+(jjc-1)*ldc

      ioffv = iiv+(jjv-1)*ldv

*

      iroff = mod( ic-1, descc( mb_ ) )

      icoff = mod( jc-1, descc( nb_ ) )

      mp = numroc( m+iroff, descc( mb_ ), myrow, icrow, nprow )

      nq = numroc( n+icoff, descc( nb_ ), mycol, iccol, npcol )

      IF( myrow.EQ.icrow )

     $   mp = mp - iroff

      IF( mycol.EQ.iccol )

     $   nq = nq - icoff

*

*     Is sub( C ) only distributed over a process row ?

*

      crblck = ( m.LE.(descc( mb_ )-iroff) )

*

*     Is sub( C ) only distributed over a process column ?

*

      ccblck = ( n.LE.(descc( nb_ )-icoff) )

*

      IF( lsame( side, 'L' ) ) THEN

*

         IF( crblck ) THEN

            rdest = icrow

         ELSE

            rdest = -1

         END IF

*

         IF( ccblck ) THEN

*

*           sub( C ) is distributed over a process column

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              Transpose row vector V

*

               ipw = mp+1

               CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,

     $                       descv( nb_ ), iroff, v( ioffv ), ldv, zero,

     $                       work, 1, ivrow, ivcol, icrow, iccol,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( mycol.EQ.iccol ) THEN

*

                  IF( myrow.EQ.ivrow ) THEN

*

                     CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,

     $                             tau( iiv ), 1 )

                     tauloc = tau( iiv )

*

                  ELSE

*

                     CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1,

     $                             tauloc, 1, ivrow, mycol )

*

                  END IF

*

                  IF( tauloc.NE.zero ) THEN

*

*                    w := sub( C )' * v

*

                     IF( mp.GT.0 ) THEN

                        CALL dgemv( 'Transpose', mp, nq, one,

     $                              c( ioffc ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                     ELSE

                        CALL dlaset( 'All', nq, 1, zero, zero,

     $                               work( ipw ), max( 1, nq ) )

                     END IF

                     CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                             work( ipw ), max( 1, nq ), rdest,

     $                             mycol )

*

*                    sub( C ) := sub( C ) - v * w'

*

                     CALL dger( mp, nq, -tauloc, work, 1, work( ipw ),

     $                          1, c( ioffc ), ldc )

                  END IF

*

               END IF

*

            ELSE

*

*              V is a column vector

*

               IF( ivcol.EQ.iccol ) THEN

*

*                 Perform the local computation within a process column

*

                  IF( mycol.EQ.iccol ) THEN

*

                     tauloc = tau( jjv )

*

                     IF( tauloc.NE.zero ) THEN

*

*                       w := sub( C )' * v

*

                        IF( mp.GT.0 ) THEN

                           CALL dgemv( 'Transpose', mp, nq, one,

     $                                 c( ioffc ), ldc, v( ioffv ), 1,

     $                                 zero, work, 1 )

                        ELSE

                           CALL dlaset( 'All', nq, 1, zero, zero,

     $                                  work, max( 1, nq ) )

                        END IF

                        CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                                work, max( 1, nq ), rdest, mycol )

*

*                       sub( C ) := sub( C ) - v * w'

*

                        CALL dger( mp, nq, -tauloc, v( ioffv ), 1, work,

     $                             1, c( ioffc ), ldc )

                     END IF

*

                  END IF

*

               ELSE

*

*                 Send V and TAU to the process column ICCOL

*

                  IF( mycol.EQ.ivcol ) THEN

*

                     ipw = mp+1

                     CALL dcopy( mp, v( ioffv ), 1, work, 1 )

                     work( ipw ) = tau( jjv )

                     CALL dgesd2d( ictxt, ipw, 1, work, ipw, myrow,

     $                             iccol )

*

                  ELSE IF( mycol.EQ.iccol ) THEN

*

                     ipw = mp+1

                     CALL dgerv2d( ictxt, ipw, 1, work, ipw, myrow,

     $                             ivcol )

                     tauloc = work( ipw )

*

                     IF( tauloc.NE.zero ) THEN

*

*                       w := sub( C )' * v

*

                        IF( mp.GT.0 ) THEN

                           CALL dgemv( 'Transpose', mp, nq, one,

     $                                 c( ioffc ), ldc, work, 1, zero,

     $                                 work( ipw ), 1 )

                        ELSE

                           CALL dlaset( 'All', nq, 1, zero, zero,

     $                                  work( ipw ), max( 1, nq ) )

                        END IF

                        CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                                work( ipw ), max( 1, nq ), rdest,

     $                                mycol )

*

*                       sub( C ) := sub( C ) - v * w'

*

                        CALL dger( mp, nq, -tauloc, work, 1,

     $                             work( ipw ), 1, c( ioffc ), ldc )

                     END IF

*

                  END IF

*

               END IF

*

            END IF

*

         ELSE

*

*           sub( C ) is a proper distributed matrix

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              Transpose and broadcast row vector V

*

               ipw = mp+1

               CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,

     $                       descv( nb_ ), iroff, v( ioffv ), ldv, zero,

     $                       work, 1, ivrow, ivcol, icrow, -1,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( myrow.EQ.ivrow ) THEN

*

                  CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,

     $                          tau( iiv ), 1 )

                  tauloc = tau( iiv )

*

               ELSE

*

                  CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,

     $                          1, ivrow, mycol )

*

               END IF

*

               IF( tauloc.NE.zero ) THEN

*

*                 w := sub( C )' * v

*

                  IF( mp.GT.0 ) THEN

                     IF( ioffc.GT.0 )

     $                  CALL dgemv( 'Transpose', mp, nq, one,

     $                              c( ioffc ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                  ELSE

                     CALL dlaset( 'All', nq, 1, zero, zero,

     $                            work( ipw ), max( 1, nq ) )

                  END IF

                  CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                          work( ipw ), max( 1, nq ), rdest,

     $                          mycol )

*

*                 sub( C ) := sub( C ) - v * w'

*

                  IF( ioffc.GT.0 )

     $               CALL dger( mp, nq, -tauloc, work, 1, work( ipw ),

     $                          1, c( ioffc ), ldc )

               END IF

*

            ELSE

*

*              Broadcast column vector V

*

               CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )

               IF( mycol.EQ.ivcol ) THEN

*

                  ipw = mp+1

                  CALL dcopy( mp, v( ioffv ), 1, work, 1 )

                  work(ipw) = tau( jjv )

                  CALL dgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,

     $                          work, ipw )

                  tauloc = tau( jjv )

*

               ELSE

*

                  ipw = mp+1

                  CALL dgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,

     $                          ipw, myrow, ivcol )

                  tauloc = work( ipw )

*

               END IF

*

               IF( tauloc.NE.zero ) THEN

*

*                 w := sub( C )' * v

*

                  IF( mp.GT.0 ) THEN

                     IF( ioffc.GT.0 )

     $                  CALL dgemv( 'Transpose', mp, nq, one,

     $                              c( ioffc ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                  ELSE

                     CALL dlaset( 'All', nq, 1, zero, zero,

     $                            work( ipw ), max( 1, nq ) )

                  END IF

                  CALL dgsum2d( ictxt, 'Columnwise', ' ', nq, 1,

     $                          work( ipw ), max( 1, nq ), rdest,

     $                          mycol )

*

*                 sub( C ) := sub( C ) - v * w'

*

                  IF( ioffc.GT.0 )

     $               CALL dger( mp, nq, -tauloc, work, 1, work( ipw ),

     $                          1, c( ioffc ), ldc )

               END IF

*

            END IF

*

         END IF

*

      ELSE

*

         IF( ccblck ) THEN

            rdest = myrow

         ELSE

            rdest = -1

         END IF

*

         IF( crblck ) THEN

*

*           sub( C ) is distributed over a process row

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              V is a row vector

*

               IF( ivrow.EQ.icrow ) THEN

*

*                 Perform the local computation within a process row

*

                  IF( myrow.EQ.icrow ) THEN

*

                     tauloc = tau( iiv )

*

                     IF( tauloc.NE.zero ) THEN

*

*                       w := sub( C ) * v

*

                        IF( nq.GT.0 ) THEN

                           CALL dgemv( 'No transpose', mp, nq, one,

     $                                 c( ioffc ), ldc, v( ioffv ), ldv,

     $                                 zero, work, 1 )

                        ELSE

                           CALL dlaset( 'All', mp, 1, zero, zero,

     $                                  work, max( 1, mp ) )

                        END IF

                        CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                                work, max( 1, mp ), rdest, iccol )

*

*                       sub( C ) := sub( C ) - w * v'

*

                        IF( ioffv.GT.0 .AND. ioffc.GT.0 )

     $                     CALL dger( mp, nq, -tauloc, work, 1,

     $                                v( ioffv ), ldv, c( ioffc ), ldc )

                     END IF

*

                  END IF

*

               ELSE

*

*                 Send V and TAU to the process row ICROW

*

                  IF( myrow.EQ.ivrow ) THEN

*

                     ipw = nq+1

                     CALL dcopy( nq, v( ioffv ), ldv, work, 1 )

                     work(ipw) = tau( iiv )

                     CALL dgesd2d( ictxt, ipw, 1, work, ipw, icrow,

     $                             mycol )

*

                  ELSE IF( myrow.EQ.icrow ) THEN

*

                     ipw = nq+1

                     CALL dgerv2d( ictxt, ipw, 1, work, ipw, ivrow,

     $                             mycol )

                     tauloc = work( ipw )

*

                     IF( tauloc.NE.zero ) THEN

*

*                       w := sub( C ) * v

*

                        IF( nq.GT.0 ) THEN

                           CALL dgemv( 'No transpose', mp, nq, one,

     $                                 c( ioffc ), ldc, work, 1, zero,

     $                                 work( ipw ), 1 )

                        ELSE

                           CALL dlaset( 'All', mp, 1, zero, zero,

     $                                  work( ipw ), max( 1, mp ) )

                        END IF

                        CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                                work( ipw ), max( 1, mp ), rdest,

     $                                iccol )

*

*                       sub( C ) := sub( C ) - w * v'

*

                        CALL dger( mp, nq, -tauloc, work( ipw ), 1,

     $                             work, 1, c( ioffc ), ldc )

                     END IF

*

                  END IF

*

               END IF

*

            ELSE

*

*              Transpose column vector V

*

               ipw = nq+1

               CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,

     $                       descv( mb_ ), icoff, v( ioffv ), 1, zero,

     $                       work, 1, ivrow, ivcol, icrow, iccol,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( myrow.EQ.icrow ) THEN

*

                  IF( mycol.EQ.ivcol ) THEN

*

                     CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1,

     $                             tau( jjv ), 1 )

                     tauloc = tau( jjv )

*

                  ELSE

*

                     CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,

     $                             1, myrow, ivcol )

*

                  END IF

*

                  IF( tauloc.NE.zero ) THEN

*

*                    w := sub( C ) * v

*

                     IF( nq.GT.0 ) THEN

                        CALL dgemv( 'No transpose', mp, nq, one,

     $                              c( ioffc ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                     ELSE

                        CALL dlaset( 'All', mp, 1, zero, zero,

     $                               work( ipw ), max( 1, mp ) )

                     END IF

                     CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                             work( ipw ), max( 1, mp ), rdest,

     $                             iccol )

*

*                    sub( C ) := sub( C ) - w * v'

*

                     CALL dger( mp, nq, -tauloc, work( ipw ), 1, work,

     $                          1, c( ioffc ), ldc )

                  END IF

*

               END IF

*

            END IF

*

         ELSE

*

*           sub( C ) is a proper distributed matrix

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              Broadcast row vector V

*

               CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',

     $                         colbtop )

               IF( myrow.EQ.ivrow ) THEN

*

                  ipw = nq+1

                  IF( ioffv.GT.0 )

     $               CALL dcopy( nq, v( ioffv ), ldv, work, 1 )

                  work(ipw) = tau( iiv )

                  CALL dgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,

     $                          work, ipw )

                  tauloc = tau( iiv )

*

               ELSE

*

                  ipw = nq+1

                  CALL dgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,

     $                          work, ipw, ivrow, mycol )

                  tauloc = work( ipw )

*

               END IF

*

               IF( tauloc.NE.zero ) THEN

*

*                 w := sub( C ) * v

*

                  IF( nq.GT.0 ) THEN

                     CALL dgemv( 'No Transpose', mp, nq, one,

     $                           c( ioffc ), ldc, work, 1, zero,

     $                           work( ipw ), 1 )

                  ELSE

                     CALL dlaset( 'All', mp, 1, zero, zero,

     $                            work( ipw ), max( 1, mp ) )

                  END IF

                  CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                          work( ipw ), max( 1, mp ), rdest,

     $                          iccol )

*

*                 sub( C ) := sub( C ) - w * v'

*

                  IF( ioffc.GT.0 )

     $               CALL dger( mp, nq, -tauloc, work( ipw ), 1, work,

     $                          1, c( ioffc ), ldc )

               END IF

*

            ELSE

*

*              Transpose and broadcast column vector V

*

               ipw = nq+1

               CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,

     $                       descv( mb_ ), icoff, v( ioffv ), 1, zero,

     $                       work, 1, ivrow, ivcol, -1, iccol,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( mycol.EQ.ivcol ) THEN

*

                  CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),

     $                          1 )

                  tauloc = tau( jjv )

*

               ELSE

*

                  CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,

     $                          myrow, ivcol )

*

               END IF

*

               IF( tauloc.NE.zero ) THEN

*

*                 w := sub( C ) * v

*

                  IF( nq.GT.0 ) THEN

                     CALL dgemv( 'No transpose', mp, nq, one,

     $                           c( ioffc ), ldc, work, 1, zero,

     $                           work( ipw ), 1 )

                  ELSE

                     CALL dlaset( 'All', mp, 1, zero, zero, work( ipw ),

     $                            max( 1, mp ) )

                  END IF

                  CALL dgsum2d( ictxt, 'Rowwise', ' ', mp, 1,

     $                          work( ipw ), max( 1, mp ), rdest,

     $                          iccol )

*

*                 sub( C ) := sub( C ) - w * v'

*

                  CALL dger( mp, nq, -tauloc, work( ipw ), 1, work, 1,

     $                       c( ioffc ), ldc )

               END IF

*

            END IF

*

         END IF

*

      END IF

*

      RETURN

*

*     End of PDLARF

*

      END