      SUBROUTINE PZHPMVF( UPLO, N, ALPHA, A, IA, JA, DESCA, X, IX, JX,
     $                    DESCX, INCX, BETA, Y, IY, JY, DESCY, INCY,
     $                    WORK, LWORK )
*
*
*  -- ScaLAPACK auxiliary routine (version 2.0) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     Oct 10, 1996
*
*
*  Purpose
*  =======
*
*  PSPMVF performs the distributed matrix-vector operation
*     sub( Y ) := alpha*sub( A ) * sub( X ) + beta*sub( Y ),
*
*  where sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1),
*
*        sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
*                         X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
*
*        sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
*                         Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
*
*  alpha and beta are scalars, sub( X ) and sub( Y ) are N element
*  distributed vectors and sub( A ) is an N-by-N symmetric distributed
*  matrix.
*
*  Matrix sub(A) is stored in packed storage.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector descA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DT_A   (global) descA[ DT_ ]   The descriptor type.  In this case,
*                                 DT_A = 1.
*  CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) descA[ M_ ]    The number of rows in the global
*                                 array A.
*  N_A    (global) descA[ N_ ]    The number of columns in the global
*                                 array A.
*  MB_A   (global) descA[ MB_ ]   The blocking factor used to distribu-
*                                 te the rows of the array.
*  NB_A   (global) descA[ NB_ ]   The blocking factor used to distribu-
*                                 te the columns of the array.
*  RSRC_A (global) descA[ RSRC_ ] The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) descA[ CSRC_ ] The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  descA[ LLD_ ]  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, myprow, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, mypcol, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Because vectors may be seen as particular matrices, a distributed
*  vector is considered to be a distributed matrix.
*
*  The distributed vectors X and Y must be distributed along the same
*  axis, i.e if INCX = M_X then INCY must be equal to M_Y, similarly if
*  INCX = 1 and INCX <> M_X, then INCY must be equal to 1. Moreover,
*  MB_A = NB_A is required as well as MOD(IA-1,MB_A) = MOD(JA-1,NB_A).
*
*  If INCY = M_Y, the process column having the first entries of
*  sub( Y ) must also contain the first block of sub( A ), similarly,
*  if INCX = 1 and INCX <> M_X, the process row having the first
*  entries of sub( X ) must also contain the first block of sub( A ).
*
*  If INCX = M_X (resp. (INCX = 1 and INCX <> 1) ), the column (resp.
*  row) offset of sub( X ) must be equal to the row offset of sub( A ),
*  i.e MOD(JX-1,NB_X) (resp. MOD(IX-1,MB_X)) = MOD(JA-1,NB_A). Moreover,
*  the column (resp. row) blocksize of X must be equal to the column
*  blocksize of A, i.e NB_X = NB_A (resp. MB_X = NB_A).
*
*  If INCY = M_Y (resp. (INCY = 1 and INCY <> M_Y)), the column (resp.
*  row) offset of sub( Y ) must be equal to the row offset of sub( A ),
*  i.e MOD(JY-1,NB_Y) (resp. MOD(IY-1,MB_Y)) = MOD(IA-1,MB_A). Moreover,
*  the column (resp. row) blocksize of Y must be equal to the row
*  blocksize of A, i.e NB_Y = MB_A (resp. MB_Y = MB_A).
*
*  Parameters
*  ==========
*
*  UPLO    (global input) CHARACTER
*          On entry, UPLO specifies whether the upper or lower
*          triangular part of the distributed matrix sub( A ) is to be
*          referenced as follows:
*
*             UPLO = 'U' or 'u'   Only the upper triangular part of
*                                 sub( A ) is to be referenced.
*
*             UPLO = 'L' or 'l'   Only the lower triangular part of
*                                 sub( A ) is to be referenced.
*
*  N       (global input) INTEGER
*          The order of the distributed matrix sub( A ). N >= 0.
*
*  ALPHA   (global input) DTYPE
*          On entry, ALPHA specifies the scalar alpha.
*
*  A       (local input) DTYPE
*          an array of dimension (LLD_A,LOCc(JA+N-1) containing the
*          local pieces of the distributed matrix sub( A ). Before
*          entry with  UPLO = 'U' or 'u', the leading N-by-N upper
*          triangular part of the distributed matrix sub( A ) must
*          contain the upper triangular part of the symmetric matrix
*          contain the upper triangular part of the hermitian matrix
*          must contain the lower triangular part of the symmetric
*          matrix and the strictly upper triangular part of sub( A )
*          is not referenced.
*
*  IA      (global input) INTEGER
*          The global row index of the submatrix of the distributed
*          matrix A to operate on.
*
*  JA      (global input) INTEGER
*          The global column index of the submatrix of the distributed
*          matrix A to operate on.
*
*  DESCA   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix A.
*
*  X       (local input) DTYPE array containing the local
*          pieces of a distributed matrix of dimension of at least
*                  ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
*          This array contains the entries of the distributed vector
*          sub( X ).
*
*  IX      (global input) INTEGER
*          The global row index of the submatrix of the distributed
*          matrix X to operate on.
*
*  JX      (global input) INTEGER
*          The global column index of the submatrix of the distributed
*          matrix X to operate on.
*
*  DESCX   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix X.
*
*  INCX    (global input) INTEGER
*          The global increment for the elements of X. Only two values
*          of INCX are supported in this version, namely 1 and M_X.
*
*  BETA    (global input) DTYPE
*          On entry,  BETA  specifies the scalar  beta.  When  BETA  is
*          supplied as zero then sub( Y ) need not be set on input.
*
*  Y       (local input/local output) DTYPE array
*          containing the local pieces of a distributed matrix of
*          dimension of at least
*                ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
*          This array contains the entries of the distributed vector
*          sub( Y ).  On exit, sub( Y ) is overwritten by the updated
*          distributed vector sub( Y ).
*
*  IY      (global input) INTEGER
*          The global row index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  JY      (global input) INTEGER
*          The global column index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  DESCY   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix Y.
*
*  INCY    (global input) INTEGER
*          The global increment for the elements of Y. Only two values
*          of INCY are supported in this version, namely 1 and M_Y.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            DLEN_
      PARAMETER          ( DLEN_ = 9 )
      INTEGER            CTXT_, M_, N_, MB_, NB_
      PARAMETER          ( CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6 )
      INTEGER            RSRC_, CSRC_, LLD_
      PARAMETER          ( RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
*     ..
*     .. Scalar Arguments ..
      CHARACTER          UPLO
      INTEGER            IA, INCX, INCY, IX, IY, JA, JX, JY, LWORK, N
      COMPLEX*16         ALPHA, BETA
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCX( * ), DESCY( * )
      COMPLEX*16         A( * ), WORK( * ), X( * ), Y( * )
*     ..
*     .. Local Scalars ..
      LOGICAL            FOUND, HASDIAGONAL, HASWORK, ISLOWER, ISUPPER,
     $                   ISVALID, LWORKQUERY
      CHARACTER*3        SCOPE
      CHARACTER*10       TRANS1, TRANS2
      INTEGER            CDEST, COFF, CSRC, GNNZ, IA0, IA1, IA2, IAEND,
     $                   IAPOS, IAROW, IASTART, ICOLA, ICONTXT, ICPTR,
     $                   IFREE, IIA, INEED, INFO, IOFF, IPOS, IPOSDIAG,
     $                   IPWORK, IP_XC, IP_XR, IP_YC, IP_YR, IROWA,
     $                   IRPTR, IXROW, IYROW, JA0, JA1, JA2, JACOL,
     $                   JAEND, JASIZE, JASTART, JJA, JXCOL, JYCOL,
     $                   LCINDXA, LDD, LLDA, LNN, LOCP, LOFFSET1,
     $                   LRINDXA, M, MB, MM, MYPCOL, MYPROW, NB, NN,
     $                   NNZ, NPCOL, NPROW, P0, Q0, RDEST, ROFF, RSRC,
     $                   XCNEED, XRNEED, YCNEED, YRNEED
      COMPLEX*16         BBETA, ONE, ZERO
*     ..
*     .. Local Arrays ..
      INTEGER            DESC1( DLEN_ ), DESCXC( DLEN_ ),
     $                   DESCXR( DLEN_ ), DESCYC( DLEN_ ),
     $                   DESCYR( DLEN_ )
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            INDXFIRST, NUMROC, NUMROC2
      EXTERNAL           LSAME, INDXFIRST, NUMROC, NUMROC2
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, DESCINITT, DESCSET, INFOG1L,
     $                   INFOG2L, INFOT, PXERBLA, PZAXPY, PZCOPY,
     $                   PZSCAL, PZTRANU, ZCOPY, ZGEMV, ZGSUM2D, ZHEMV
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          DBLE, DCMPLX, MAX, MIN, MOD
*     ..
*     .. Executable Statements ..
      ONE = DCMPLX( DBLE( 1 ) )
      ZERO = DCMPLX( DBLE( 0 ) )
      BBETA = ONE
      M = N
      IF( N.LE.0 ) THEN
         RETURN
      ENDIF
      ISLOWER = LSAME( UPLO, 'Lower' )
      ISUPPER = LSAME( UPLO, 'Upper' )
      ISVALID = ( ISLOWER .OR. ISUPPER )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 1 )
         RETURN
      ENDIF
      ISVALID = ( INCX.EQ.1 ) .OR. ( INCX.EQ.DESCX( M_ ) )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 12 )
         RETURN
      ENDIF
      ISVALID = ( INCY.EQ.1 ) .OR. ( INCY.EQ.DESCY( M_ ) )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 18 )
         RETURN
      ENDIF
      ISVALID = ( 1.LE.IA ) .AND. ( IA+N-1.LE.DESCA( M_ ) )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 5 )
         RETURN
      ENDIF
      ISVALID = ( 1.LE.JA ) .AND. ( JA+N-1.LE.DESCA( N_ ) )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 6 )
         RETURN
      ENDIF
      IF( INCX.EQ.1 ) THEN
         ISVALID = ( 1.LE.IX ) .AND. ( IX+N-1.LE.DESCX( M_ ) )
      ELSE
         ISVALID = ( 1.LE.IX ) .AND. ( IX.LE.DESCX( M_ ) )
      ENDIF
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 9 )
         RETURN
      ENDIF
      IF( INCX.EQ.1 ) THEN
         ISVALID = ( 1.LE.JX ) .AND. ( JX.LE.DESCX( N_ ) )
      ELSE
         ISVALID = ( 1.LE.JX ) .AND. ( JX+N-1.LE.DESCX( N_ ) )
      ENDIF
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 10 )
         RETURN
      ENDIF
      IF( INCY.EQ.1 ) THEN
         ISVALID = ( 1.LE.IY ) .AND. ( IY+N-1.LE.DESCY( M_ ) )
      ELSE
         ISVALID = ( 1.LE.IY ) .AND. ( IY.LE.DESCY( M_ ) )
      ENDIF
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 15 )
         RETURN
      ENDIF
      IF( INCY.EQ.1 ) THEN
         ISVALID = ( 1.LE.JY ) .AND. ( JY.LE.DESCY( N_ ) )
      ELSE
         ISVALID = ( 1.LE.JY ) .AND. ( JY+N-1.LE.DESCY( N_ ) )
      ENDIF
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 16 )
         RETURN
      ENDIF
*
*
*  XR,YR are nx1 replicated vectors
*  that matches the rows of A
*
*  XC,YC are 1xn replicated vectors
*  that matches the column of A
*
*
*  The vectors are aligned to require no communication
*  during actual computation.
*
*
*
* Check storage requirements.
*
      CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYPROW,
     $                     MYPCOL )
      MB = DESCA( MB_ )
      NB = DESCA( NB_ )
      ICONTXT = DESCA( CTXT_ )
      IAROW = MOD( DESCA( RSRC_ )+( IA-1 ) / MB, NPROW )
      JACOL = MOD( DESCA( CSRC_ )+( JA-1 ) / NB, NPCOL )
      IXROW = MOD( DESCX( RSRC_ )+( IX-1 ) / DESCX( MB_ ), NPROW )
      JXCOL = MOD( DESCX( CSRC_ )+( JX-1 ) / DESCX( NB_ ), NPCOL )
      IYROW = MOD( DESCY( RSRC_ )+( IY-1 ) / DESCY( MB_ ), NPROW )
      JYCOL = MOD( DESCY( CSRC_ )+( JY-1 ) / DESCY( NB_ ), NPCOL )
*
*  if ia (ja) is aligned to block boundary, then
*  roff (coff) equals 0.
*
      ROFF = MOD( MB+IA-1, MB )
      IIA = IA - MOD( MB+IA-1, MB )
      MM = ( IA+N-1 ) - IIA + 1
      COFF = MOD( NB+JA-1, NB )
      JJA = JA - MOD( NB+JA-1, NB )
      NN = ( JA+N-1 ) - JJA + 1
      P0 = MYPROW
      Q0 = MYPCOL
      XRNEED = MAX( 1, NUMROC( MM, MB, MYPROW, P0, NPROW ) )
      XCNEED = MAX( 1, NUMROC( NN, NB, MYPCOL, Q0, NPCOL ) )
      YRNEED = XRNEED
      YCNEED = XCNEED
      INEED = XCNEED + XRNEED + YCNEED + YRNEED
      LWORKQUERY = ( LWORK.EQ.-1 )
      IF( LWORK.LT.INEED ) THEN
         IF( .NOT.LWORKQUERY ) THEN
            CALL PXERBLA( DESCA( CTXT_ ), 'PxSPMVF', 20 )
         ENDIF
         WORK( 1 ) = INEED
         RETURN
      ENDIF
      IFREE = 1
      IP_XR = IFREE
      IFREE = IFREE + XRNEED
      IP_YC = IFREE
      IFREE = IFREE + YCNEED
      IP_XC = IFREE
      IFREE = IFREE + XCNEED
      IP_YR = IFREE
      IFREE = IFREE + YRNEED
*
* Zero out work space.
*
      CALL ZCOPY( INEED, ZERO, 0, WORK, 1 )
*
*  Prescale y vector.
*
      IF( BETA.NE.ONE ) THEN
         CALL PZSCAL( N, BETA, Y, IY, JY, DESCY, INCY )
      ENDIF
*
* Copy vectors into xr, xc.
*
      INFO = 0
      LDD = XRNEED
      P0 = IAROW
      Q0 = JXCOL
      CALL DESCSET( DESCXR, MM, 1, MB, NB, P0, Q0, ICONTXT, LDD )
      INFO = 0
      P0 = IXROW
      Q0 = JACOL
      LDD = 1
      CALL DESCSET( DESCXC, 1, NN, MB, NB, P0, Q0, ICONTXT, LDD )
*
*  New PBLAS V2 capability in replicated matrices.
*
      DESCXR( CSRC_ ) = -1
      DESCXC( RSRC_ ) = -1
*
*  XC is 1 by nn
*  XR is mm by 1
*
      IF( ( INCX.EQ.1 ) .AND. ( INCX.NE.DESCX( M_ ) ) ) THEN
         CALL PZCOPY( N, X, IX, JX, DESCX, INCX, WORK( IP_XR ), 1+ROFF,
     $                1, DESCXR, 1 )
         CALL PZTRANU( 1, N, ONE, WORK( IP_XR ), 1+ROFF, 1, DESCXR,
     $                 ZERO, WORK( IP_XC ), 1, 1+COFF, DESCXC )
      ELSE
         CALL PZCOPY( N, X, IX, JX, DESCX, INCX, WORK( IP_XC ), 1,
     $                1+COFF, DESCXC, DESCXC( M_ ) )
         CALL PZTRANU( N, 1, ONE, WORK( IP_XC ), 1, 1+COFF, DESCXC,
     $                 ZERO, WORK( IP_XR ), 1+ROFF, 1, DESCXR )
      ENDIF
*
*   Treat as local vector.
*
      P0 = MYPROW
      Q0 = JACOL
      INFO = 0
      LDD = 1
      CALL DESCSET( DESCXC, 1, NN, MB, NB, P0, Q0, ICONTXT, LDD )
      CALL DESCSET( DESCYC, 1, NN, MB, NB, P0, Q0, ICONTXT, LDD )
      P0 = IAROW
      Q0 = MYPCOL
      INFO = 0
      LDD = XRNEED
      CALL DESCSET( DESCXR, MM, 1, MB, NB, P0, Q0, ICONTXT, LDD )
      CALL DESCSET( DESCYR, MM, 1, MB, NB, P0, Q0, ICONTXT, LDD )
*
*  Main computation loop.
*
*  No need for further communication within computation loop.
*
*  iapos points to beginning of entire panel
*  gnnz is number of nonzeros of entire panel
*  nnz is number of nonzeros in submatrix within a column
*
*
      JASTART = INDXFIRST( N, JA, DESCA( NB_ ), MYPCOL, DESCA( CSRC_ ),
     $          NPCOL )
      IASTART = IA + ( JASTART-JA )
      ISVALID = ( 1.LE.IASTART ) .AND. ( IASTART.LE.DESCA( M_ ) ) .AND.
     $          ( 1.LE.JASTART ) .AND. ( JASTART.LE.DESCA( N_ ) )
      IF( ISVALID ) THEN
         CALL INFOT( UPLO, IASTART, JASTART, DESCA, IA1, JA1, IA2, JA2 )
         CALL DESCINITT( UPLO, IA1, JA1, DESCA, IA0, JA0, LOFFSET1,
     $                   DESC1 )
         CALL INFOG2L( IA0, JA0, DESC1, NPROW, NPCOL, MYPROW, MYPCOL,
     $                 LRINDXA, LCINDXA, IROWA, ICOLA )
         IAPOS = ( LOFFSET1-1 ) + LRINDXA + ( LCINDXA-1 )*DESC1( LLD_ )
         LLDA = DESC1( LLD_ )
*
* while loop.
*
   10    CONTINUE
         IF( JASTART.LE.JA+N-1 ) THEN
*
*         Align jaend to end of block boundary.
*
            JAEND = JASTART - MOD( NB+JASTART-1, NB ) + ( NB-1 )
            JAEND = MAX( JASTART, MIN( JA+N-1, JAEND ) )
            JASIZE = JAEND - JASTART + 1
            IASTART = IA + ( JASTART-JA )
            IAEND = IA + ( JAEND-JA )
            CALL INFOT( UPLO, IASTART, JASTART, DESCA, IA1, JA1, IA2,
     $                  JA2 )
            GNNZ = NUMROC2( IA2-IA1+1, IA1, DESCA( MB_ ), MYPROW,
     $             DESCA( RSRC_ ), NPROW )
            LLDA = MAX( 1, GNNZ )
            IF( ISLOWER ) THEN
               IOFF = NUMROC2( IASTART-IA1+1, IA1, DESCA( MB_ ), MYPROW,
     $                DESCA( RSRC_ ), NPROW )
               LNN = ( IA+N-1 ) - IASTART + 1
               NNZ = NUMROC2( LNN, IASTART, DESCA( MB_ ), MYPROW,
     $               DESCA( RSRC_ ), NPROW )
            ELSE
               IOFF = NUMROC2( IA-IA1+1, IA1, DESCA( MB_ ), MYPROW,
     $                DESCA( RSRC_ ), NPROW )
               LNN = ( IAEND-IA+1 )
               NNZ = NUMROC2( LNN, IA, MB, MYPROW, DESCA( RSRC_ ),
     $               NPROW )
            ENDIF
            IPWORK = IAPOS + MOD( JASTART-1, DESCA( NB_ ) )*LLDA +
     $               MAX( 0, ( IOFF-1 ) )
            Q0 = MOD( DESCA( CSRC_ )+( JASTART-1 ) / NB, NPCOL )
            IIA = JASTART
            P0 = MOD( DESCA( RSRC_ )+( IIA-1 ) / MB, NPROW )
            HASDIAGONAL = ( P0.EQ.MYPROW ) .AND. ( Q0.EQ.MYPCOL )
            IF( HASDIAGONAL ) THEN
*
*               Perform serial SYMV operation.
*
               IF( ISLOWER ) THEN
                  IPOSDIAG = IPWORK
                  IPWORK = IPOSDIAG + JASIZE
               ELSE
                  IPOSDIAG = IPWORK + NNZ - JASIZE
               ENDIF
               CALL INFOG1L( ROFF+( JASTART-JA )+1, MB, NPROW, MYPROW,
     $                       DESCXR( RSRC_ ), IRPTR, RSRC )
               CALL INFOG1L( COFF+( JASTART-JA )+1, NB, NPCOL, MYPCOL,
     $                       DESCYC( CSRC_ ), ICPTR, CSRC )
*
*              Extra check.
*
               Q0 = MOD( DESCA( CSRC_ )+( JASTART+JASIZE-1-1 ) / NB,
     $              NPCOL )
               P0 = MOD( DESCA( RSRC_ )+( IIA+JASIZE-1-1 ) / MB, NPROW )
               ISVALID = ( P0.EQ.MYPROW ) .AND. ( Q0.EQ.MYPCOL )
               CALL ZHEMV( UPLO, JASIZE, ALPHA, A( IPOSDIAG ), LLDA,
     $                     WORK( ( IP_XR-1 )+IRPTR ), 1, BBETA,
     $                     WORK( ( IP_YC-1 )+ICPTR ), 1 )
            ENDIF
            IF( ISLOWER ) THEN
*
*            Use lower triangular part of A.
*
               LOCP = NNZ
               IF( HASDIAGONAL ) THEN
                  LOCP = NNZ - JASIZE
               ENDIF
               IIA = IA + ( JASTART-JA ) + JASIZE
               LNN = ( IA+N-1 ) - IIA + 1
               HASWORK = ( LOCP.GE.1 )
               IF( HASWORK ) THEN
                  IPOS = IPWORK
                  LDD = MAX( 1, LLDA )
                  CALL INFOG1L( ( JASTART-JA )+1+COFF, NB, NPCOL,
     $                          MYPCOL, DESCXC( CSRC_ ), ICPTR, CSRC )
                  IASTART = IA + ( JASTART-JA )
                  IIA = IASTART + JASIZE
                  LNN = ( IA+N-1 ) - IIA + 1
                  IA0 = INDXFIRST( LNN, IIA, MB, MYPROW, DESCA( RSRC_ ),
     $                  NPROW )
                  FOUND = ( IIA.LE.IA0 ) .AND. ( IA0.LE.IIA+LNN-1 )
                  IIA = IA0
                  CALL INFOG1L( ( IIA-IA )+1+ROFF, MB, NPROW, MYPROW,
     $                          DESCYR( RSRC_ ), IRPTR, RSRC )
*
* Key computation with off-diagonal block.
*
                  TRANS1 = 'Notrans'
                  TRANS2 = 'Conjugate Transpose'
                  CALL ZGEMV( TRANS1, LOCP, JASIZE, ALPHA, A( IPOS ),
     $                        LDD, WORK( ( IP_XC-1 )+ICPTR ), 1, BBETA,
     $                        WORK( ( IP_YR-1 )+IRPTR ), 1 )
                  CALL ZGEMV( TRANS2, LOCP, JASIZE, ALPHA, A( IPOS ),
     $                        LDD, WORK( ( IP_XR-1 )+IRPTR ), 1, BBETA,
     $                        WORK( ( IP_YC-1 )+ICPTR ), 1 )
               ENDIF
            ELSE
*
*            Use upper triangular part of A.
*
               LOCP = NNZ
               IF( HASDIAGONAL ) THEN
                  LOCP = NNZ - JASIZE
               ENDIF
               LNN = ( JASTART-JA )
               HASWORK = ( LOCP.GE.1 )
               IF( HASWORK ) THEN
                  IPOS = IPWORK
                  LDD = MAX( 1, LLDA )
*
* Key computation with off-diagonal block.
*
                  IRPTR = 1 + ROFF
                  CALL INFOG1L( ( JASTART-JA )+1+COFF, NB, NPCOL,
     $                          MYPCOL, DESCXC( CSRC_ ), ICPTR, CSRC )
                  TRANS1 = 'Notrans'
                  TRANS2 = 'Conjugate Transpose'
                  CALL ZGEMV( TRANS1, LOCP, JASIZE, ALPHA, A( IPOS ),
     $                        LDD, WORK( ( IP_XC-1 )+ICPTR ), 1, BBETA,
     $                        WORK( ( IP_YR-1 )+IRPTR ), 1 )
                  CALL ZGEMV( TRANS2, LOCP, JASIZE, ALPHA, A( IPOS ),
     $                        LDD, WORK( ( IP_XR-1 )+IRPTR ), 1, BBETA,
     $                        WORK( ( IP_YC-1 )+ICPTR ), 1 )
               ENDIF
            ENDIF
            JASTART = MAX( JAEND+1, ( JAEND-( NB-1 ) )+NB*NPCOL )
            IAPOS = IAPOS + GNNZ*DESCA( NB_ )
            GOTO 10
         ENDIF
   20    CONTINUE
*
* End while loop.
*
      ENDIF
*
*   Sum results.
*
      SCOPE = 'Row'
      LDD = YRNEED
*
*  rdest = myprow; cdest = jycol;
*
      RDEST = MYPROW
      CDEST = JYCOL
      CALL ZGSUM2D( DESCYR( CTXT_ ), SCOPE, ' ', YRNEED, 1,
     $              WORK( IP_YR ), LDD, RDEST, CDEST )
      P0 = IAROW
      Q0 = JYCOL
      LDD = YRNEED
      CALL DESCSET( DESCYR, MM, 1, MB, NB, P0, Q0, ICONTXT, LDD )
      CALL PZAXPY( N, ONE, WORK( ( IP_YR-1 )+1 ), 1+ROFF, 1, DESCYR, 1,
     $             Y, IY, JY, DESCY, INCY )
      SCOPE = 'Col'
      LDD = YCNEED
*
*  rdest = iyrow; cdest = mypcol;
*
      RDEST = IYROW
      CDEST = MYPCOL
      CALL ZGSUM2D( DESCYC( CTXT_ ), SCOPE, ' ', YCNEED, 1,
     $              WORK( IP_YC ), LDD, RDEST, CDEST )
      P0 = IYROW
      Q0 = JACOL
      LDD = 1
      CALL DESCSET( DESCYC, 1, MM, MB, NB, P0, Q0, ICONTXT, LDD )
      CALL PZAXPY( N, ONE, WORK( ( IP_YC-1 )+1 ), 1, 1+COFF, DESCYC,
     $             DESCYC( M_ ), Y, IY, JY, DESCY, INCY )
*
*   All done.
*
      WORK( 1 ) = INEED
      RETURN
      END
