      SUBROUTINE PSLAMVTF( UPLO, TRANS, M, N, ALPHA, A, IA, JA, DESCA,
     $                     X, IX, JX, DESCX, INCX, BETA, Y, IY, JY,
     $                     DESCY, INCY, WORK, LWORK )
*
*
*  -- ScaLAPACK auxiliary routine (version 2.0) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     Oct 10, 1996
*
*
*  Purpose
*  =======
*
*  PLAMVTF performs the distributed matrix-vector operation
*
*
*      sub( Y ) := alpha*sub( A )  * sub( X )  + beta*sub( Y ),  or
*      sub( Y ) := alpha*sub( A )' * sub( X )  + beta*sub( Y ),  or
*      sub( Y ) := alpha*conjg( sub( A )' )* sub( X ) + beta*sub( Y ),
*     sub( Y ) := alpha*sub( A ) * sub( X ) + beta*sub( Y ),
*
*  where sub( A ) denotes A(IA:IA+M-1,JA:JA+N-1),
*
*        sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
*                         X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
*
*        sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
*                         Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
*
*  alpha and beta are scalars, sub( X ) and sub( Y ) are N element
*  distributed vectors and sub( A ) is an N-by-N symmetric distributed
*  matrix stored in compressed triangular form.
*
*  Moreover, sub(A) is stored in strict upper part if uplo = 'U'
*            sub(A) is stored in strict lower part if uplo = 'L'
*
*  Matrix sub(A) is stored in packed storage.
*
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector descA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DT_A   (global) descA[ DT_ ]   The descriptor type.  In this case,
*                                 DT_A = 1.
*  CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) descA[ M_ ]    The number of rows in the global
*                                 array A.
*  N_A    (global) descA[ N_ ]    The number of columns in the global
*                                 array A.
*  MB_A   (global) descA[ MB_ ]   The blocking factor used to distribu-
*                                 te the rows of the array.
*  NB_A   (global) descA[ NB_ ]   The blocking factor used to distribu-
*                                 te the columns of the array.
*  RSRC_A (global) descA[ RSRC_ ] The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) descA[ CSRC_ ] The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  descA[ LLD_ ]  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCr( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCc( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCr() and LOCc() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCr( M ) = NUMROC( M, MB_A, myprow, RSRC_A, NPROW ),
*          LOCc( N ) = NUMROC( N, NB_A, mypcol, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Because vectors may be seen as particular matrices, a distributed
*  vector is considered to be a distributed matrix.
*
*  The distributed vectors X and Y must be distributed along the same
*  axis, i.e if INCX = M_X then INCY must be equal to M_Y, similarly if
*  INCX = 1 and INCX <> M_X, then INCY must be equal to 1. Moreover,
*  MB_A = NB_A is required as well as MOD(IA-1,MB_A) = MOD(JA-1,NB_A).
*
*  If INCY = M_Y, the process column having the first entries of
*  sub( Y ) must also contain the first block of sub( A ), similarly,
*  if INCX = 1 and INCX <> M_X, the process row having the first
*  entries of sub( X ) must also contain the first block of sub( A ).
*
*  If INCX = M_X (resp. (INCX = 1 and INCX <> 1) ), the column (resp.
*  row) offset of sub( X ) must be equal to the row offset of sub( A ),
*  i.e MOD(JX-1,NB_X) (resp. MOD(IX-1,MB_X)) = MOD(JA-1,NB_A). Moreover,
*  the column (resp. row) blocksize of X must be equal to the column
*  blocksize of A, i.e NB_X = NB_A (resp. MB_X = NB_A).
*
*  If INCY = M_Y (resp. (INCY = 1 and INCY <> M_Y)), the column (resp.
*  row) offset of sub( Y ) must be equal to the row offset of sub( A ),
*  i.e MOD(JY-1,NB_Y) (resp. MOD(IY-1,MB_Y)) = MOD(IA-1,MB_A). Moreover,
*  the column (resp. row) blocksize of Y must be equal to the row
*  blocksize of A, i.e NB_Y = MB_A (resp. MB_Y = MB_A).
*
*  Parameters
*  ==========
*
*  UPLO    (global input) CHARACTER
*          On entry, UPLO specifies whether the upper or lower
*          triangular part of the distributed matrix sub( A ) is to be
*          referenced as follows:
*
*             UPLO = 'U' or 'u'   Only the upper triangular part of
*                                 sub( A ) is to be referenced.
*
*             UPLO = 'L' or 'l'   Only the lower triangular part of
*                                 sub( A ) is to be referenced.
*
*  N       (global input) INTEGER
*          The order of the distributed matrix sub( A ). N >= 0.
*
*  ALPHA   (global input) DTYPE
*          On entry, ALPHA specifies the scalar alpha.
*
*  A       (local input) DTYPE
*          an array of dimension (LLD_A,LOCc(JA+N-1) containing the
*          local pieces of the distributed matrix sub( A ). Before
*          entry with  UPLO = 'U' or 'u', the leading N-by-N upper
*          triangular part of the distributed matrix sub( A ) must
*          contain the upper triangular part of the symmetric matrix
*          contain the upper triangular part of the hermitian matrix
*          must contain the lower triangular part of the symmetric
*          matrix and the strictly upper triangular part of sub( A )
*          is not referenced.
*
*  IA      (global input) INTEGER
*          The global row index of the submatrix of the distributed
*          matrix A to operate on.
*
*  JA      (global input) INTEGER
*          The global column index of the submatrix of the distributed
*          matrix A to operate on.
*
*  DESCA   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix A.
*
*  X       (local input) DTYPE array containing the local
*          pieces of a distributed matrix of dimension of at least
*                  ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
*          This array contains the entries of the distributed vector
*          sub( X ).
*
*  IX      (global input) INTEGER
*          The global row index of the submatrix of the distributed
*          matrix X to operate on.
*
*  JX      (global input) INTEGER
*          The global column index of the submatrix of the distributed
*          matrix X to operate on.
*
*  DESCX   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix X.
*
*  INCX    (global input) INTEGER
*          The global increment for the elements of X. Only two values
*          of INCX are supported in this version, namely 1 and M_X.
*
*  BETA    (global input) DTYPE
*          On entry,  BETA  specifies the scalar  beta.  When  BETA  is
*          supplied as zero then sub( Y ) need not be set on input.
*
*  Y       (local input/local output) DTYPE array
*          containing the local pieces of a distributed matrix of
*          dimension of at least
*                ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
*          This array contains the entries of the distributed vector
*          sub( Y ).  On exit, sub( Y ) is overwritten by the updated
*          distributed vector sub( Y ).
*
*  IY      (global input) INTEGER
*          The global row index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  JY      (global input) INTEGER
*          The global column index of the submatrix of the distributed
*          matrix Y to operate on.
*
*  DESCY   (global and local input) INTEGER array of dimension 8.
*          The array descriptor of the distributed matrix Y.
*
*  INCY    (global input) INTEGER
*          The global increment for the elements of Y. Only two values
*          of INCY are supported in this version, namely 1 and M_Y.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            DLEN_
      PARAMETER          ( DLEN_ = 9 )
      INTEGER            CTXT_, M_, N_, MB_, NB_
      PARAMETER          ( CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6 )
      INTEGER            RSRC_, CSRC_, LLD_
      PARAMETER          ( RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
*     ..
*     .. Scalar Arguments ..
      CHARACTER          TRANS, UPLO
      INTEGER            IA, INCX, INCY, IX, IY, JA, JX, JY, LWORK, M, N
      REAL               ALPHA, BETA
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCX( * ), DESCY( * )
      REAL               A( * ), WORK( * ), X( * ), Y( * )
*     ..
*     .. Local Scalars ..
      LOGICAL            HASWORK, ISLOWER, ISMINE, ISTRANSA, ISUPPER,
     $                   ISVALID, LWORKQUERY
      CHARACTER*3        SCOPE
      INTEGER            CDEST, COFF, CSRC, IA0, IAEND, IAROW, IASTART,
     $                   ICOLA, ICONTXT, ICPTR, IFREE, IIA, INEED, INFO,
     $                   IPOS, IP_XC, IP_XR, IP_XX, IP_YC, IP_YR, IP_YY,
     $                   IROWA, IRPTR, IXPTR, IXROW, IYPTR, IYROW, JA0,
     $                   JACOL, JAEND, JASIZE, JASTART, JJA, JXCOL,
     $                   JYCOL, LCINDXA, LDD, LLDA, LOCP, LOCQ,
     $                   LOFFSET1, LRINDXA, MB, MM, MYPCOL, MYPROW, NB,
     $                   NN, NPCOL, NPROW, P0, Q0, RDEST, ROFF, RSRC,
     $                   XCNEED, XCSIZE, XRNEED, XRSIZE, XSIZE, XXNEED,
     $                   XXSIZE, YCNEED, YCSIZE, YRNEED, YRSIZE, YSIZE,
     $                   YYNEED, YYSIZE
      REAL               BBETA, ONE, ZERO
*     ..
*     .. Local Arrays ..
      INTEGER            DESC1( DLEN_ ), DESCXC( DLEN_ ),
     $                   DESCXR( DLEN_ ), DESCXX( DLEN_ ),
     $                   DESCYC( DLEN_ ), DESCYR( DLEN_ ),
     $                   DESCYY( DLEN_ )
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            INDXFIRST, NUMROC, NUMROC2
      EXTERNAL           LSAME, INDXFIRST, NUMROC, NUMROC2
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, DESCINITT, DESCSET, ICOPY,
     $                   INFOG1L, INFOG2L, PSAXPY, PSCOPY, PSSCAL,
     $                   PXERBLA, SCOPY, SGEBR2D, SGEBS2D, SGEMV,
     $                   SGSUM2D
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN, MOD, REAL
*     ..
*     .. Executable Statements ..
      ONE = REAL( 1 )
      ZERO = REAL( 0 )
      BBETA = ONE
      IF( ( N.LE.0 ) .OR. ( M.LE.0 ) ) THEN
         WORK( 1 ) = 0
         RETURN
      ENDIF
      ISTRANSA = LSAME( TRANS, 'Trans' ) .OR.
     $           LSAME( TRANS, 'Conjugate' )
      IF( ISTRANSA ) THEN
         XSIZE = M
         YSIZE = N
      ELSE
         XSIZE = N
         YSIZE = M
      ENDIF
      ISLOWER = LSAME( UPLO, 'Lower' )
      ISUPPER = LSAME( UPLO, 'Upper' )
      ISVALID = ( ISLOWER .OR. ISUPPER )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 1 )
         RETURN
      ENDIF
      ISVALID = ( INCX.EQ.1 ) .OR. ( INCX.EQ.DESCX( M_ ) )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 14 )
         RETURN
      ENDIF
      ISVALID = ( INCY.EQ.1 ) .OR. ( INCY.EQ.DESCY( M_ ) )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 20 )
         RETURN
      ENDIF
      ISVALID = ( 1.LE.IA ) .AND. ( IA+M-1.LE.DESCA( M_ ) )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 7 )
         RETURN
      ENDIF
      ISVALID = ( 1.LE.JA ) .AND. ( JA+N-1.LE.DESCA( N_ ) )
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 8 )
         RETURN
      ENDIF
      IF( INCX.EQ.DESCX( M_ ) ) THEN
         ISVALID = ( 1.LE.IX ) .AND. ( IX.LE.DESCX( M_ ) )
      ELSE
         ISVALID = ( 1.LE.IX ) .AND. ( IX+XSIZE-1.LE.DESCX( M_ ) )
      ENDIF
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 11 )
         RETURN
      ENDIF
      IF( INCX.EQ.DESCX( M_ ) ) THEN
         ISVALID = ( 1.LE.JX ) .AND. ( JX+XSIZE-1.LE.DESCX( N_ ) )
      ELSE
         ISVALID = ( 1.LE.JX ) .AND. ( JX.LE.DESCX( N_ ) )
      ENDIF
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 12 )
         RETURN
      ENDIF
      IF( INCY.EQ.DESCY( M_ ) ) THEN
         ISVALID = ( 1.LE.IY ) .AND. ( IY.LE.DESCY( M_ ) )
      ELSE
         ISVALID = ( 1.LE.IY ) .AND. ( IY+YSIZE-1.LE.DESCY( M_ ) )
      ENDIF
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 17 )
         RETURN
      ENDIF
      IF( INCY.EQ.DESCY( M_ ) ) THEN
         ISVALID = ( 1.LE.JY ) .AND. ( JY+YSIZE-1.LE.DESCY( N_ ) )
      ELSE
         ISVALID = ( 1.LE.JY ) .AND. ( JY.LE.DESCY( N_ ) )
      ENDIF
      IF( .NOT.ISVALID ) THEN
         CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 18 )
         RETURN
      ENDIF
      IF( .NOT.ISTRANSA ) THEN
         MM = M
         NN = N
      ELSE
         MM = N
         NN = M
      ENDIF
      IF( ISLOWER ) THEN
         ISVALID = ( IA.GE.JA ) .AND. ( IA+M-1.GE.JA ) .AND.
     $             ( IA.GE.JA+N-1 ) .AND. ( IA+M-1.GE.JA+N-1 )
         IF( .NOT.ISVALID ) THEN
            CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 7 )
            RETURN
         ENDIF
      ELSE
         ISVALID = ( JA.GE.IA ) .AND. ( JA.GE.IA+M-1 ) .AND.
     $             ( JA+N-1.GE.IA ) .AND. ( JA+N-1.GE.IA+M-1 )
         IF( .NOT.ISVALID ) THEN
            CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 8 )
            RETURN
         ENDIF
      ENDIF
      CALL BLACS_GRIDINFO( DESCA( CTXT_ ), NPROW, NPCOL, MYPROW,
     $                     MYPCOL )
*
*
*  XR,YR are nx1 replicated vectors
*  that matches the rows of A
*
*  XC,YC are 1xn replicated vectors
*  that matches the column of A
*
*
*  The vectors are aligned to require no communication
*  during actual computation.
*
*
*
* Check storage requirements.
*
      MB = DESCA( MB_ )
      NB = DESCA( NB_ )
      ICONTXT = DESCA( CTXT_ )
      IAROW = MOD( DESCA( RSRC_ )+( IA-1 ) / MB, NPROW )
      JACOL = MOD( DESCA( CSRC_ )+( JA-1 ) / NB, NPCOL )
      IXROW = MOD( DESCX( RSRC_ )+( IX-1 ) / DESCX( MB_ ), NPROW )
      JXCOL = MOD( DESCX( CSRC_ )+( JX-1 ) / DESCX( NB_ ), NPCOL )
      IYROW = MOD( DESCY( RSRC_ )+( IY-1 ) / DESCY( MB_ ), NPROW )
      JYCOL = MOD( DESCY( CSRC_ )+( JY-1 ) / DESCY( NB_ ), NPCOL )
*
*  if ia (ja) is aligned to block boundary, then
*  roff (coff) equals 0.
*
      ROFF = MOD( MB+IA-1, MB )
      IIA = IA - MOD( MB+IA-1, MB )
      MM = ( IA+M-1 ) - IIA + 1
      COFF = MOD( NB+JA-1, NB )
      JJA = JA - MOD( NB+JA-1, NB )
      NN = ( JA+N-1 ) - JJA + 1
      P0 = MYPROW
      Q0 = MYPCOL
      XRNEED = MAX( 1, NUMROC( MM, MB, MYPROW, P0, NPROW ) )
      XCNEED = MAX( 1, NUMROC( NN, NB, MYPCOL, Q0, NPCOL ) )
      XRSIZE = MM
      YRSIZE = MM
      XCSIZE = NN
      YCSIZE = NN
      YRNEED = XRNEED
      YCNEED = XCNEED
      IF( ISTRANSA ) THEN
         XSIZE = M
         YSIZE = N
         XXSIZE = XRSIZE
         YYSIZE = YCSIZE
         XXNEED = XRNEED
         YYNEED = YCNEED
      ELSE
         XSIZE = N
         YSIZE = M
         XXSIZE = XCSIZE
         YYSIZE = YRSIZE
         XXNEED = XCNEED
         YYNEED = YRNEED
      ENDIF
      INEED = XXNEED + YYNEED
      LWORKQUERY = ( LWORK.EQ.-1 )
      IF( LWORK.LT.INEED ) THEN
         IF( .NOT.LWORKQUERY ) THEN
            CALL PXERBLA( DESCA( CTXT_ ), 'PxLAMVTF', 22 )
         ENDIF
         WORK( 1 ) = INEED
         RETURN
      ENDIF
      IFREE = 1
      IP_XX = IFREE
      IFREE = IFREE + XXNEED
      IP_YY = IFREE
      IFREE = IFREE + YYNEED
      IF( ISTRANSA ) THEN
         IP_XR = IP_XX
         IP_XC = -1
         IP_YC = IP_YY
         IP_YR = -1
      ELSE
         IP_XC = IP_XX
         IP_XR = -1
         IP_YR = IP_YY
         IP_YC = -1
      ENDIF
*
* Zero out work space.
*
      CALL SCOPY( INEED, ZERO, 0, WORK, 1 )
*
*  Prescale y vector.
*
      IF( BETA.NE.ONE ) THEN
         CALL PSSCAL( YSIZE, BETA, Y, IY, JY, DESCY, INCY )
      ENDIF
*
* Copy vectors into xr, xc.
*
      IF( ISTRANSA ) THEN
         INFO = 0
         P0 = IAROW
         Q0 = JXCOL
         LDD = XRNEED
         CALL DESCSET( DESCXR, XRSIZE, 1, MB, NB, P0, Q0, ICONTXT, LDD )
         CALL PSCOPY( XSIZE, X, IX, JX, DESCX, INCX, WORK( IP_XR ),
     $                1+ROFF, 1, DESCXR, 1 )
      ELSE
         INFO = 0
         P0 = IXROW
         Q0 = JACOL
         LDD = 1
         CALL DESCSET( DESCXC, 1, XCSIZE, MB, NB, P0, Q0, ICONTXT, LDD )
         CALL PSCOPY( XSIZE, X, IX, JX, DESCX, INCX, WORK( IP_XC ), 1,
     $                1+COFF, DESCXC, DESCXC( M_ ) )
      ENDIF
*
*  Broadcast XR and XC.
*
      IF( ISTRANSA ) THEN
         RSRC = MYPROW
         CSRC = DESCXR( CSRC_ )
         ISMINE = ( DESCXR( CSRC_ ).EQ.MYPCOL )
         SCOPE = 'Row'
         IF( ISMINE ) THEN
            CALL SGEBS2D( DESCXR( CTXT_ ), SCOPE, ' ', XRNEED, 1,
     $                    WORK( IP_XR ), XRNEED )
         ELSE
            CALL SGEBR2D( DESCXR( CTXT_ ), SCOPE, ' ', XRNEED, 1,
     $                    WORK( IP_XR ), XRNEED, RSRC, CSRC )
         ENDIF
      ELSE
         RSRC = DESCXC( RSRC_ )
         CSRC = MYPCOL
         ISMINE = ( DESCXC( RSRC_ ).EQ.MYPROW )
         SCOPE = 'Col'
         IF( ISMINE ) THEN
            CALL SGEBS2D( DESCXC( CTXT_ ), SCOPE, ' ', XCNEED, 1,
     $                    WORK( IP_XC ), XCNEED )
         ELSE
            CALL SGEBR2D( DESCXC( CTXT_ ), SCOPE, ' ', XCNEED, 1,
     $                    WORK( IP_XC ), XCNEED, RSRC, CSRC )
         ENDIF
      ENDIF
*
*   Treat as local vector.
*
      P0 = MYPROW
      Q0 = JACOL
      INFO = 0
      LDD = 1
      CALL DESCSET( DESCXC, 1, XCSIZE, MB, NB, P0, Q0, ICONTXT, LDD )
      CALL DESCSET( DESCYC, 1, YCSIZE, MB, NB, P0, Q0, ICONTXT, LDD )
      P0 = IAROW
      Q0 = MYPCOL
      INFO = 0
      LDD = XRNEED
      CALL DESCSET( DESCXR, MM, 1, MB, NB, P0, Q0, ICONTXT, LDD )
      CALL DESCSET( DESCYR, MM, 1, MB, NB, P0, Q0, ICONTXT, LDD )
      IF( ISTRANSA ) THEN
         CALL ICOPY( DLEN_, DESCXR, 1, DESCXX, 1 )
         CALL ICOPY( DLEN_, DESCYC, 1, DESCYY, 1 )
      ELSE
         CALL ICOPY( DLEN_, DESCXC, 1, DESCXX, 1 )
         CALL ICOPY( DLEN_, DESCYR, 1, DESCYY, 1 )
      ENDIF
*
*  Main computation loop.
*
*  No need for further communication within computation loop.
*
*  iapos points to beginning of entire panel
*  gnnz is number of nonzeros of entire panel
*  nnz is number of nonzeros in submatrix within a column
*
*
      JASTART = INDXFIRST( N, JA, DESCA( NB_ ), MYPCOL, DESCA( CSRC_ ),
     $          NPCOL )
      IASTART = IA
      IAEND = IA + M - 1
      CALL INFOG1L( ( IASTART-IA )+1+ROFF, MB, NPROW, MYPROW,
     $              DESCXR( RSRC_ ), IRPTR, RSRC )
*
* while loop.
*
   10 CONTINUE
      IF( JASTART.LE.JA+N-1 ) THEN
*
*         Align jaend to end of block boundary.
*
         JAEND = JASTART - MOD( NB+JASTART-1, NB ) + ( NB-1 )
         JAEND = MAX( JASTART, MIN( JA+N-1, JAEND ) )
         JASIZE = JAEND - JASTART + 1
         CALL DESCINITT( UPLO, IASTART, JASTART, DESCA, IA0, JA0,
     $                   LOFFSET1, DESC1 )
         CALL INFOG2L( IA0, JA0, DESC1, NPROW, NPCOL, MYPROW, MYPCOL,
     $                 LRINDXA, LCINDXA, IROWA, ICOLA )
         IPOS = ( LOFFSET1-1 ) + LRINDXA + ( LCINDXA-1 )*DESC1( LLD_ )
         LLDA = DESC1( LLD_ )
         CALL INFOG1L( ( JASTART-JA )+1+COFF, NB, NPCOL, MYPCOL,
     $                 DESCXC( CSRC_ ), ICPTR, CSRC )
         LOCP = NUMROC2( M, IA, MB, MYPROW, DESCA( RSRC_ ), NPROW )
         LOCQ = NUMROC2( JASIZE, JASTART, NB, MYPCOL, DESCA( CSRC_ ),
     $          NPCOL )
         HASWORK = ( LOCP.GE.1 ) .AND. ( LOCQ.GE.1 )
         IF( ISTRANSA ) THEN
            IXPTR = IRPTR
            IYPTR = ICPTR
         ELSE
            IXPTR = ICPTR
            IYPTR = IRPTR
         ENDIF
         IF( HASWORK ) THEN
            CALL SGEMV( TRANS, LOCP, LOCQ, ALPHA, A( IPOS ), LLDA,
     $                  WORK( ( IP_XX-1 )+IXPTR ), 1, BBETA,
     $                  WORK( ( IP_YY-1 )+IYPTR ), 1 )
         ENDIF
         JASTART = MAX( JAEND+1, ( JAEND-( NB-1 ) )+NB*NPCOL )
         GOTO 10
      ENDIF
   20 CONTINUE
*
* End while loop.
*
*
*   Sum results.
*
      IF( .NOT.ISTRANSA ) THEN
*
*   YY is YR,  YR is mm by 1
*
         SCOPE = 'Row'
         LDD = YRNEED
*
*  rdest = myprow; cdest = jycol;
*
         RDEST = MYPROW
         CDEST = JYCOL
         CALL SGSUM2D( DESCYR( CTXT_ ), SCOPE, ' ', YRNEED, 1,
     $                 WORK( IP_YR ), LDD, RDEST, CDEST )
         P0 = IAROW
         Q0 = JYCOL
         LDD = YRNEED
         CALL DESCSET( DESCYR, MM, 1, MB, NB, P0, Q0, ICONTXT, LDD )
         CALL PSAXPY( YSIZE, ONE, WORK( ( IP_YR-1 )+1 ), 1+ROFF, 1,
     $                DESCYR, 1, Y, IY, JY, DESCY, INCY )
      ELSE
*
*  YY is YC ,  YC is 1 by ycsize
*
         SCOPE = 'Col'
         LDD = YCNEED
*
*  rdest = iyrow; cdest = mypcol;
*
         RDEST = IYROW
         CDEST = MYPCOL
         CALL SGSUM2D( DESCYC( CTXT_ ), SCOPE, ' ', YCNEED, 1,
     $                 WORK( IP_YC ), LDD, RDEST, CDEST )
         P0 = IYROW
         Q0 = JACOL
         LDD = 1
         CALL DESCSET( DESCYC, 1, NN, MB, NB, P0, Q0, ICONTXT, LDD )
         CALL PSAXPY( YSIZE, ONE, WORK( ( IP_YC-1 )+1 ), 1, 1+COFF,
     $                DESCYC, DESCYC( M_ ), Y, IY, JY, DESCY, INCY )
      ENDIF
*
*   All done.
*
      WORK( 1 ) = INEED
      RETURN
      END
