d2/d1b/pdlarz_8f_source.html

      SUBROUTINE pdlarz( SIDE, M, N, L, V, IV, JV, DESCV, INCV, TAU, C,

     $                   IC, JC, DESCC, WORK )

*

*  -- ScaLAPACK auxiliary routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     May 25, 2001

*

*     .. Scalar Arguments ..

      CHARACTER          SIDE

      INTEGER            IC, INCV, IV, JC, JV, L, M, N

*     ..

*     .. Array Arguments ..

      INTEGER            DESCC( * ), DESCV( * )

      DOUBLE PRECISION   C( * ), TAU( * ), V( * ), WORK( * )

*     ..

*

*  Purpose

*  =======

*

*  PDLARZ applies a real elementary reflector Q (or Q**T) to a real

*  M-by-N distributed matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1), from

*  either the left or the right. Q is represented in the form

*

*        Q = I - tau * v * v'

*

*  where tau is a real scalar and v is a real vector.

*

*  If tau = 0, then Q is taken to be the unit matrix.

*

*  Q is a product of k elementary reflectors as returned by PDTZRZF.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Because vectors may be viewed as a subclass of matrices, a

*  distributed vector is considered to be a distributed matrix.

*

*  Restrictions

*  ============

*

*  If SIDE = 'Left' and INCV = 1, then the row process having the first

*  entry V(IV,JV) must also own C(IC+M-L,JC:JC+N-1). Moreover,

*  MOD(IV-1,MB_V) must be equal to MOD(IC+N-L-1,MB_C), if INCV=M_V, only

*  the last equality must be satisfied.

*

*  If SIDE = 'Right' and INCV = M_V then the column process having the

*  first entry V(IV,JV) must also own C(IC:IC+M-1,JC+N-L) and

*  MOD(JV-1,NB_V) must be equal to MOD(JC+N-L-1,NB_C), if INCV = 1 only

*  the last equality must be satisfied.

*

*  Arguments

*  =========

*

*  SIDE    (global input) CHARACTER

*          = 'L': form  Q * sub( C ),

*          = 'R': form  sub( C ) * Q, Q = Q**T.

*

*  M       (global input) INTEGER

*          The number of rows to be operated on i.e the number of rows

*          of the distributed submatrix sub( C ). M >= 0.

*

*  N       (global input) INTEGER

*          The number of columns to be operated on i.e the number of

*          columns of the distributed submatrix sub( C ). N >= 0.

*

*  L       (global input) INTEGER

*          The columns of the distributed submatrix sub( A ) containing

*          the meaningful part of the Householder reflectors.

*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.

*

*  V       (local input) DOUBLE PRECISION pointer into the local memory

*          to an array of dimension (LLD_V,*) containing the local

*          pieces of the distributed vectors V representing the

*          Householder transformation Q,

*             V(IV:IV+L-1,JV) if SIDE = 'L' and INCV = 1,

*             V(IV,JV:JV+L-1) if SIDE = 'L' and INCV = M_V,

*             V(IV:IV+L-1,JV) if SIDE = 'R' and INCV = 1,

*             V(IV,JV:JV+L-1) if SIDE = 'R' and INCV = M_V,

*

*          The vector v in the representation of Q. V is not used if

*          TAU = 0.

*

*  IV      (global input) INTEGER

*          The row index in the global array V indicating the first

*          row of sub( V ).

*

*  JV      (global input) INTEGER

*          The column index in the global array V indicating the

*          first column of sub( V ).

*

*  DESCV   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix V.

*

*  INCV    (global input) INTEGER

*          The global increment for the elements of V. Only two values

*          of INCV are supported in this version, namely 1 and M_V.

*          INCV must not be zero.

*

*  TAU     (local input) DOUBLE PRECISION array, dimension  LOCc(JV) if

*          INCV = 1, and LOCr(IV) otherwise. This array contains the

*          Householder scalars related to the Householder vectors.

*          TAU is tied to the distributed matrix V.

*

*  C       (local input/local output) DOUBLE PRECISION pointer into the

*          local memory to an array of dimension (LLD_C, LOCc(JC+N-1) ),

*          containing the local pieces of sub( C ). On exit, sub( C )

*          is overwritten by the Q * sub( C ) if SIDE = 'L', or

*          sub( C ) * Q if SIDE = 'R'.

*

*  IC      (global input) INTEGER

*          The row index in the global array C indicating the first

*          row of sub( C ).

*

*  JC      (global input) INTEGER

*          The column index in the global array C indicating the

*          first column of sub( C ).

*

*  DESCC   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix C.

*

*  WORK    (local workspace) DOUBLE PRECISION array, dimension (LWORK)

*          If INCV = 1,

*            if SIDE = 'L',

*              if IVCOL = ICCOL,

*                LWORK >= NqC0

*              else

*                LWORK >= MpC0 + MAX( 1, NqC0 )

*              end if

*            else if SIDE = 'R',

*              LWORK >= NqC0 + MAX( MAX( 1, MpC0 ), NUMROC( NUMROC(

*                       N+ICOFFC,NB_V,0,0,NPCOL ),NB_V,0,0,LCMQ ) )

*            end if

*          else if INCV = M_V,

*            if SIDE = 'L',

*              LWORK >= MpC0 + MAX( MAX( 1, NqC0 ), NUMROC( NUMROC(

*                       M+IROFFC,MB_V,0,0,NPROW ),MB_V,0,0,LCMP ) )

*            else if SIDE = 'R',

*              if IVROW = ICROW,

*                LWORK >= MpC0

*              else

*                LWORK >= NqC0 + MAX( 1, MpC0 )

*              end if

*            end if

*          end if

*

*          where LCM is the least common multiple of NPROW and NPCOL and

*          LCM = ILCM( NPROW, NPCOL ), LCMP = LCM / NPROW,

*          LCMQ = LCM / NPCOL,

*

*          IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),

*          ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),

*          ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),

*          MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),

*          NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),

*

*          ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;

*          MYROW, MYCOL, NPROW and NPCOL can be determined by calling

*          the subroutine BLACS_GRIDINFO.

*

*  Alignment requirements

*  ======================

*

*  The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)

*  must verify some alignment properties, namely the following

*  expressions should be true:

*

*  MB_V = NB_V,

*

*  If INCV = 1,

*    If SIDE = 'Left',

*      ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )

*    If SIDE = 'Right',

*      ( MB_V.EQ.NB_A .AND. MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )

*  else if INCV = M_V,

*    If SIDE = 'Left',

*      ( MB_V.EQ.NB_V .AND. MB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )

*    If SIDE = 'Right',

*      ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )

*  end if

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   lld_, mb_, m_, nb_, n_, rsrc_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      DOUBLE PRECISION   ONE, ZERO

      parameter( one  = 1.0d+0, zero = 0.0d+0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            CCBLCK, CRBLCK, LEFT

      CHARACTER          COLBTOP, ROWBTOP

      INTEGER            ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,

     $                   icrow1, icrow2, ictxt, iic1, iic2, iiv, ioffc1,

     $                   ioffc2, ioffv, ipw, iroffc1, iroffc2, iroffv,

     $                   ivcol, ivrow, jjc1, jjc2, jjv, ldc, ldv, mpc2,

     $                   mpv, mycol, myrow, ncc, ncv, npcol, nprow,

     $                   nqc2, nqv, rdest

      DOUBLE PRECISION   TAULOC

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, daxpy, dcopy, dgebr2d,

     $                   dgebs2d, dgemv, dger, dgerv2d,

     $                   dgesd2d, dgsum2d, dlaset, infog2l,

     $                   pb_topget, pbdtrnv

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            NUMROC

      EXTERNAL           lsame, numroc

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min, mod

*     ..

*     .. Executable Statements ..

*

*     Quick return if possible

*

      IF( m.LE.0 .OR. n.LE.0 )

     $   RETURN

*

*     Get grid parameters.

*

      ictxt = descc( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

*

*     Figure local indexes

*

      left = lsame( side, 'L' )

      CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,

     $              ivrow, ivcol )

      iroffv = mod( iv-1, descv( nb_ ) )

      mpv = numroc( l+iroffv, descv( mb_ ), myrow, ivrow, nprow )

      IF( myrow.EQ.ivrow )

     $   mpv = mpv - iroffv

      icoffv = mod( jv-1, descv( nb_ ) )

      nqv = numroc( l+icoffv, descv( nb_ ), mycol, ivcol, npcol )

      IF( mycol.EQ.ivcol )

     $   nqv = nqv - icoffv

      ldv = descv( lld_ )

      ncv = numroc( descv( n_ ), descv( nb_ ), mycol, descv( csrc_ ),

     $              npcol )

      ldv = descv( lld_ )

      iiv = min( iiv, ldv )

      jjv = min( jjv, ncv )

      ioffv = iiv+(jjv-1)*ldv

      ncc = numroc( descc( n_ ), descc( nb_ ), mycol, descc( csrc_ ),

     $              npcol )

      CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol,

     $              iic1, jjc1, icrow1, iccol1 )

      iroffc1 = mod( ic-1, descc( mb_ ) )

      icoffc1 = mod( jc-1, descc( nb_ ) )

      ldc = descc( lld_ )

      iic1 = min( iic1, ldc )

      jjc1 = min( jjc1, max( 1, ncc ) )

      ioffc1 = iic1 + ( jjc1-1 ) * ldc

*

      IF( left ) THEN

         CALL infog2l( ic+m-l, jc, descc, nprow, npcol, myrow, mycol,

     $                 iic2, jjc2, icrow2, iccol2 )

         iroffc2 = mod( ic+m-l-1, descc( mb_ ) )

         icoffc2 = mod( jc-1, descc( nb_ ) )

         nqc2 = numroc( n+icoffc2, descc( nb_ ), mycol, iccol2, npcol )

         IF( mycol.EQ.iccol2 )

     $      nqc2 = nqc2 - icoffc2

      ELSE

         CALL infog2l( ic, jc+n-l, descc, nprow, npcol, myrow, mycol,

     $                 iic2, jjc2, icrow2, iccol2 )

         iroffc2 = mod( ic-1, descc( mb_ ) )

         mpc2 = numroc( m+iroffc2, descc( mb_ ), myrow, icrow2, nprow )

         IF( myrow.EQ.icrow2 )

     $      mpc2 = mpc2 - iroffc2

         icoffc2 = mod( jc+n-l-1, descc( nb_ ) )

      END IF

      iic2 = min( iic2, ldc )

      jjc2 = min( jjc2, ncc )

      ioffc2 = iic2 + ( jjc2-1 ) * ldc

*

*     Is sub( C ) only distributed over a process row ?

*

      crblck = ( m.LE.(descc( mb_ )-iroffc1) )

*

*     Is sub( C ) only distributed over a process column ?

*

      ccblck = ( n.LE.(descc( nb_ )-icoffc1) )

*

      IF( left ) THEN

*

         IF( crblck ) THEN

            rdest = icrow2

         ELSE

            rdest = -1

         END IF

*

         IF( ccblck ) THEN

*

*           sub( C ) is distributed over a process column

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              Transpose row vector V (ICOFFV = IROFFC2)

*

               ipw = mpv+1

               CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,

     $                       descv( nb_ ), iroffc2, v( ioffv ), ldv,

     $                       zero,

     $                       work, 1, ivrow, ivcol, icrow2, iccol2,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( mycol.EQ.iccol2 ) THEN

*

                  IF( myrow.EQ.ivrow ) THEN

*

                     CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,

     $                             tau( iiv ), 1 )

                     tauloc = tau( iiv )

*

                  ELSE

*

                     CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1,

     $                             tauloc, 1, ivrow, mycol )

*

                  END IF

*

                  IF( tauloc.NE.zero ) THEN

*

*                    w := sub( C )' * v

*

                     IF( mpv.GT.0 ) THEN

                        CALL dgemv( 'Transpose', mpv, nqc2, one,

     $                              c( ioffc2 ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                     ELSE

                        CALL dlaset( 'All', nqc2, 1, zero, zero,

     $                               work( ipw ), max( 1, nqc2 ) )

                     END IF

                     IF( myrow.EQ.icrow1 )

     $                  CALL daxpy( nqc2, one, c( ioffc1 ), ldc,

     $                              work( ipw ), max( 1, nqc2 ) )

*

                     CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,

     $                             work( ipw ), max( 1, nqc2 ), rdest,

     $                             mycol )

*

*                    sub( C ) := sub( C ) - v * w'

*

                     IF( myrow.EQ.icrow1 )

     $                  CALL daxpy( nqc2, -tauloc, work( ipw ),

     $                              max( 1, nqc2 ), c( ioffc1 ), ldc )

                     CALL dger( mpv, nqc2, -tauloc, work, 1,

     $                          work( ipw ), 1, c( ioffc2 ), ldc )

                  END IF

*

               END IF

*

            ELSE

*

*              V is a column vector

*

               IF( ivcol.EQ.iccol2 ) THEN

*

*                 Perform the local computation within a process column

*

                  IF( mycol.EQ.iccol2 ) THEN

*

                     tauloc = tau( jjv )

*

                     IF( tauloc.NE.zero ) THEN

*

*                       w := sub( C )' * v

*

                        IF( mpv.GT.0 ) THEN

                           CALL dgemv( 'Transpose', mpv, nqc2, one,

     $                                 c( ioffc2 ), ldc, v( ioffv ), 1,

     $                                 zero, work, 1 )

                        ELSE

                           CALL dlaset( 'All', nqc2, 1, zero, zero,

     $                                  work, max( 1, nqc2 ) )

                        END IF

                        IF( myrow.EQ.icrow1 )

     $                     CALL daxpy( nqc2, one, c( ioffc1 ), ldc,

     $                                 work, max( 1, nqc2 ) )

*

                        CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,

     $                                work, max( 1, nqc2 ), rdest,

     $                                mycol )

*

*                       sub( C ) := sub( C ) - v * w'

*

                        IF( myrow.EQ.icrow1 )

     $                     CALL daxpy( nqc2, -tauloc, work,

     $                                 max( 1, nqc2 ), c( ioffc1 ),

     $                                 ldc )

                        CALL dger( mpv, nqc2, -tauloc, v( ioffv ), 1,

     $                             work, 1, c( ioffc2 ), ldc )

                     END IF

*

                  END IF

*

               ELSE

*

*                 Send V and TAU to the process column ICCOL2

*

                  IF( mycol.EQ.ivcol ) THEN

*

                     ipw = mpv+1

                     CALL dcopy( mpv, v( ioffv ), 1, work, 1 )

                     work( ipw ) = tau( jjv )

                     CALL dgesd2d( ictxt, ipw, 1, work, ipw, myrow,

     $                             iccol2 )

*

                  ELSE IF( mycol.EQ.iccol2 ) THEN

*

                     ipw = mpv+1

                     CALL dgerv2d( ictxt, ipw, 1, work, ipw, myrow,

     $                             ivcol )

                     tauloc = work( ipw )

*

                     IF( tauloc.NE.zero ) THEN

*

*                       w := sub( C )' * v

*

                        IF( mpv.GT.0 ) THEN

                           CALL dgemv( 'Transpose', mpv, nqc2, one,

     $                                 c( ioffc2 ), ldc, work, 1, zero,

     $                                 work( ipw ), 1 )

                        ELSE

                           CALL dlaset( 'All', nqc2, 1, zero, zero,

     $                                  work( ipw ), max( 1, nqc2 ) )

                        END IF

                        IF( myrow.EQ.icrow1 )

     $                     CALL daxpy( nqc2, one, c( ioffc1 ), ldc,

     $                                 work( ipw ), max( 1, nqc2 ) )

*

                        CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,

     $                                work( ipw ), max( 1, nqc2 ),

     $                                rdest, mycol )

*

*                       sub( C ) := sub( C ) - v * w'

*

                        IF( myrow.EQ.icrow1 )

     $                     CALL daxpy( nqc2, -tauloc, work( ipw ),

     $                                 max( 1, nqc2 ), c( ioffc1 ),

     $                                 ldc )

                        CALL dger( mpv, nqc2, -tauloc, work, 1,

     $                             work( ipw ), 1, c( ioffc2 ), ldc )

                     END IF

*

                  END IF

*

               END IF

*

            END IF

*

         ELSE

*

*           sub( C ) is a proper distributed matrix

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              Transpose and broadcast row vector V (ICOFFV=IROFFC2)

*

               ipw = mpv+1

               CALL pbdtrnv( ictxt, 'Rowwise', 'Transpose', m,

     $                       descv( nb_ ), iroffc2, v( ioffv ), ldv,

     $                       zero,

     $                       work, 1, ivrow, ivcol, icrow2, -1,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( myrow.EQ.ivrow ) THEN

*

                  CALL dgebs2d( ictxt, 'Columnwise', ' ', 1, 1,

     $                          tau( iiv ), 1 )

                  tauloc = tau( iiv )

*

               ELSE

*

                  CALL dgebr2d( ictxt, 'Columnwise', ' ', 1, 1, tauloc,

     $                          1, ivrow, mycol )

*

               END IF

*

               IF( tauloc.NE.zero ) THEN

*

*                 w := sub( C )' * v

*

                  IF( mpv.GT.0 ) THEN

                     CALL dgemv( 'Transpose', mpv, nqc2, one,

     $                           c( ioffc2 ), ldc, work, 1, zero,

     $                           work( ipw ), 1 )

                  ELSE

                     CALL dlaset( 'All', nqc2, 1, zero, zero,

     $                            work( ipw ), max( 1, nqc2 ) )

                  END IF

                  IF( myrow.EQ.icrow1 )

     $               CALL daxpy( nqc2, one, c( ioffc1 ), ldc,

     $                           work( ipw ), max( 1, nqc2 ) )

*

                  CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,

     $                          work( ipw ), max( 1, nqc2 ), rdest,

     $                          mycol )

*

*                 sub( C ) := sub( C ) - v * w'

*

                  IF( myrow.EQ.icrow1 )

     $               CALL daxpy( nqc2, -tauloc, work( ipw ),

     $                           max( 1, nqc2 ), c( ioffc1 ), ldc )

                  CALL dger( mpv, nqc2, -tauloc, work, 1, work( ipw ),

     $                       1, c( ioffc2 ), ldc )

               END IF

*

            ELSE

*

*              Broadcast column vector V

*

               CALL pb_topget( ictxt, 'Broadcast', 'Rowwise', rowbtop )

               IF( mycol.EQ.ivcol ) THEN

*

                  ipw = mpv+1

                  CALL dcopy( mpv, v( ioffv ), 1, work, 1 )

                  work( ipw ) = tau( jjv )

                  CALL dgebs2d( ictxt, 'Rowwise', rowbtop, ipw, 1,

     $                          work, ipw )

                  tauloc = tau( jjv )

*

               ELSE

*

                  ipw = mpv+1

                  CALL dgebr2d( ictxt, 'Rowwise', rowbtop, ipw, 1, work,

     $                          ipw, myrow, ivcol )

                  tauloc = work( ipw )

*

               END IF

*

               IF( tauloc.NE.zero ) THEN

*

*                 w := sub( C )' * v

*

                  IF( mpv.GT.0 ) THEN

                     CALL dgemv( 'Transpose', mpv, nqc2, one,

     $                           c( ioffc2 ), ldc, work, 1, zero,

     $                           work( ipw ), 1 )

                  ELSE

                     CALL dlaset( 'All', nqc2, 1, zero, zero,

     $                            work( ipw ), max( 1, nqc2 ) )

                  END IF

                  IF( myrow.EQ.icrow1 )

     $               CALL daxpy( nqc2, one, c( ioffc1 ), ldc,

     $                           work( ipw ), max( 1, nqc2 ) )

*

                  CALL dgsum2d( ictxt, 'Columnwise', ' ', nqc2, 1,

     $                          work( ipw ), max( 1, nqc2 ), rdest,

     $                          mycol )

*

*                 sub( C ) := sub( C ) - v * w'

*

                  IF( myrow.EQ.icrow1 )

     $               CALL daxpy( nqc2, -tauloc, work( ipw ),

     $                           max( 1, nqc2 ), c( ioffc1 ), ldc )

                  CALL dger( mpv, nqc2, -tauloc, work, 1, work( ipw ),

     $                       1, c( ioffc2 ), ldc )

               END IF

*

            END IF

*

         END IF

*

      ELSE

*

         IF( ccblck ) THEN

            rdest = myrow

         ELSE

            rdest = -1

         END IF

*

         IF( crblck ) THEN

*

*           sub( C ) is distributed over a process row

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              V is a row vector

*

               IF( ivrow.EQ.icrow2 ) THEN

*

*                 Perform the local computation within a process row

*

                  IF( myrow.EQ.icrow2 ) THEN

*

                     tauloc = tau( iiv )

*

                     IF( tauloc.NE.zero ) THEN

*

*                       w := sub( C ) * v

*

                        IF( nqv.GT.0 ) THEN

                           CALL dgemv( 'No transpose', mpc2, nqv, one,

     $                                 c( ioffc2 ), ldc, v( ioffv ),

     $                                 ldv, zero, work, 1 )

                        ELSE

                           CALL dlaset( 'All', mpc2, 1, zero, zero,

     $                                  work, max( 1, mpc2 ) )

                        END IF

                        IF( mycol.EQ.iccol1 )

     $                     CALL daxpy( mpc2, one, c( ioffc1 ), 1,

     $                                   work, 1 )

*

                        CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,

     $                                work, max( 1, mpc2 ), rdest,

     $                               iccol2 )

*

                        IF( mycol.EQ.iccol1 )

     $                     CALL daxpy( mpc2, -tauloc, work, 1,

     $                                 c( ioffc1 ), 1 )

*

*                       sub( C ) := sub( C ) - w * v'

*

                        IF( mpc2.GT.0 .AND. nqv.GT.0 )

     $                     CALL dger( mpc2, nqv, -tauloc, work, 1,

     $                                v( ioffv ), ldv, c( ioffc2 ),

     $                                ldc )

                     END IF

*

                  END IF

*

               ELSE

*

*                 Send V and TAU to the process row ICROW2

*

                  IF( myrow.EQ.ivrow ) THEN

*

                     ipw = nqv+1

                     CALL dcopy( nqv, v( ioffv ), ldv, work, 1 )

                     work( ipw ) = tau( iiv )

                     CALL dgesd2d( ictxt, ipw, 1, work, ipw, icrow2,

     $                             mycol )

*

                  ELSE IF( myrow.EQ.icrow2 ) THEN

*

                     ipw = nqv+1

                     CALL dgerv2d( ictxt, ipw, 1, work, ipw, ivrow,

     $                             mycol )

                     tauloc = work( ipw )

*

                     IF( tauloc.NE.zero ) THEN

*

*                       w := sub( C ) * v

*

                        IF( nqv.GT.0 ) THEN

                           CALL dgemv( 'No transpose', mpc2, nqv, one,

     $                                 c( ioffc2 ), ldc, work, 1, zero,

     $                                 work( ipw ), 1 )

                        ELSE

                           CALL dlaset( 'All', mpc2, 1, zero, zero,

     $                                  work( ipw ), max( 1, mpc2 ) )

                        END IF

                        IF( mycol.EQ.iccol1 )

     $                     CALL daxpy( mpc2, one, c( ioffc1 ), 1,

     $                                   work( ipw ), 1 )

                        CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,

     $                                work( ipw ), max( 1, mpc2 ),

     $                                rdest, iccol2 )

                        IF( mycol.EQ.iccol1 )

     $                     CALL daxpy( mpc2, -tauloc, work( ipw ), 1,

     $                                 c( ioffc1 ), 1 )

*

*                       sub( C ) := sub( C ) - w * v'

*

                        CALL dger( mpc2, nqv, -tauloc, work( ipw ), 1,

     $                             work, 1, c( ioffc2 ), ldc )

                     END IF

*

                  END IF

*

               END IF

*

            ELSE

*

*              Transpose column vector V (IROFFV = ICOFFC2)

*

               ipw = nqv+1

               CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,

     $                       descv( mb_ ), icoffc2, v( ioffv ), 1, zero,

     $                       work, 1, ivrow, ivcol, icrow2, iccol2,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( myrow.EQ.icrow2 ) THEN

*

                  IF( mycol.EQ.ivcol ) THEN

*

                     CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1,

     $                             tau( jjv ), 1 )

                     tauloc = tau( jjv )

*

                  ELSE

*

                     CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc,

     $                             1, myrow, ivcol )

*

                  END IF

*

                  IF( tauloc.NE.zero ) THEN

*

*                    w := sub( C ) * v

*

                     IF( nqv.GT.0 ) THEN

                        CALL dgemv( 'No transpose', mpc2, nqv, one,

     $                              c( ioffc2 ), ldc, work, 1, zero,

     $                              work( ipw ), 1 )

                     ELSE

                        CALL dlaset( 'All', mpc2, 1, zero, zero,

     $                               work( ipw ), max( 1, mpc2 ) )

                     END IF

                     IF( mycol.EQ.iccol1 )

     $                  CALL daxpy( mpc2, one, c( ioffc1 ), 1,

     $                              work( ipw ), 1 )

                     CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,

     $                             work( ipw ), max( 1, mpc2 ), rdest,

     $                             iccol2 )

                     IF( mycol.EQ.iccol1 )

     $                  CALL daxpy( mpc2, -tauloc, work( ipw ), 1,

     $                              c( ioffc1 ), 1 )

*

*                    sub( C ) := sub( C ) - w * v'

*

                     CALL dger( mpc2, nqv, -tauloc, work( ipw ), 1,

     $                          work, 1, c( ioffc2 ), ldc )

                  END IF

*

               END IF

*

            END IF

*

         ELSE

*

*           sub( C ) is a proper distributed matrix

*

            IF( descv( m_ ).EQ.incv ) THEN

*

*              Broadcast row vector V

*

               CALL pb_topget( ictxt, 'Broadcast', 'Columnwise',

     $                         colbtop )

               IF( myrow.EQ.ivrow ) THEN

*

                  ipw = nqv+1

                  CALL dcopy( nqv, v( ioffv ), ldv, work, 1 )

                  work( ipw ) = tau( iiv )

                  CALL dgebs2d( ictxt, 'Columnwise', colbtop, ipw, 1,

     $                          work, ipw )

                  tauloc = tau( iiv )

*

               ELSE

*

                  ipw = nqv+1

                  CALL dgebr2d( ictxt, 'Columnwise', colbtop, ipw, 1,

     $                          work, ipw, ivrow, mycol )

                  tauloc = work( ipw )

*

               END IF

*

               IF( tauloc.NE.zero ) THEN

*

*                 w := sub( C ) * v

*

                  IF( nqv.GT.0 ) THEN

                     CALL dgemv( 'No Transpose', mpc2, nqv, one,

     $                           c( ioffc2 ), ldc, work, 1, zero,

     $                           work( ipw ), 1 )

                  ELSE

                     CALL dlaset( 'All', mpc2, 1, zero, zero,

     $                            work( ipw ), max( 1, mpc2 ) )

                  END IF

                  IF( mycol.EQ.iccol1 )

     $               CALL daxpy( mpc2, one, c( ioffc1 ), 1,

     $                           work( ipw ), 1 )

*

                  CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,

     $                          work( ipw ), max( 1, mpc2 ), rdest,

     $                          iccol2 )

                  IF( mycol.EQ.iccol1 )

     $               CALL daxpy( mpc2, -tauloc, work( ipw ), 1,

     $                           c( ioffc1 ), 1 )

*

*                 sub( C ) := sub( C ) - w * v'

*

                  CALL dger( mpc2, nqv, -tauloc, work( ipw ), 1, work,

     $                       1, c( ioffc2 ), ldc )

               END IF

*

            ELSE

*

*              Transpose and broadcast column vector V (ICOFFC2=IROFFV)

*

               ipw = nqv+1

               CALL pbdtrnv( ictxt, 'Columnwise', 'Transpose', n,

     $                       descv( mb_ ), icoffc2, v( ioffv ), 1, zero,

     $                       work, 1, ivrow, ivcol, -1, iccol2,

     $                       work( ipw ) )

*

*              Perform the local computation within a process column

*

               IF( mycol.EQ.ivcol ) THEN

*

                  CALL dgebs2d( ictxt, 'Rowwise', ' ', 1, 1, tau( jjv ),

     $                          1 )

                  tauloc = tau( jjv )

*

               ELSE

*

                  CALL dgebr2d( ictxt, 'Rowwise', ' ', 1, 1, tauloc, 1,

     $                          myrow, ivcol )

*

               END IF

*

               IF( tauloc.NE.zero ) THEN

*

*                 w := sub( C ) * v

*

                  IF( nqv.GT.0 ) THEN

                     CALL dgemv( 'No transpose', mpc2, nqv, one,

     $                           c( ioffc2 ), ldc, work, 1, zero,

     $                           work( ipw ), 1 )

                  ELSE

                     CALL dlaset( 'All', mpc2, 1, zero, zero,

     $                            work( ipw ), max( 1, mpc2 ) )

                  END IF

                  IF( mycol.EQ.iccol1 )

     $               CALL daxpy( mpc2, one, c( ioffc1 ), 1,

     $                           work( ipw ), 1 )

                  CALL dgsum2d( ictxt, 'Rowwise', ' ', mpc2, 1,

     $                          work( ipw ), max( 1, mpc2 ), rdest,

     $                          iccol2 )

                  IF( mycol.EQ.iccol1 )

     $               CALL daxpy( mpc2, -tauloc, work( ipw ), 1,

     $                           c( ioffc1 ), 1 )

*

*                 sub( C ) := sub( C ) - w * v'

*

                  CALL dger( mpc2, nqv, -tauloc, work( ipw ), 1, work,

     $                       1, c( ioffc2 ), ldc )

               END IF

*

            END IF

*

         END IF

*

      END IF

*

      RETURN

*

*     End of PDLARZ

*

      END