C#######################################################################
C PSTSWM Version 1.0 (8/1/93)                                          #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
      SUBROUTINE SHIFTSUM(COMMOPT, BUFFERS, PROTOPT, FORCETYPE, ME,
     &                    LEFT, RIGHT, MYINDEX, BASE, STEP, MAXSTEP, 
     &                    LTH, LTJUMP, NVEC, DATA1, DATA2, SUM)
C
C This subroutine calculates one step of a ring-shift vector sum
C algorithm in which the vector is summed in segments and the result is
C left distributed. At each step, it takes as input NVEC vectors of
C length LTH and uses these to compute a local contribution. 
C
C Communication options (COMMOPT) for SHIFTSUM include:
C  IF (COMMOPT .EQ. 0) simple shift: (calc)/sum/send/recv ... (calc)/sum 
C  IF (COMMOPT .EQ. 1) ordered shift: 
C    (calc)/sum/[send/recv]|[recv/send] ... (calc)/sum
C  IF (COMMOPT .EQ. 2) delayed receive:
C    (calc)/sum/send (calc)/recv/sum/send ... (calc)/recv/sum
C Communication protocol options (PROTOPT) for SHIFTSUM include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5) .AND. (BUFFERS .GT. 1)
C    recv-ahead
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: FLTSUM
C calls: SR1, SR2, SR3, SRBEGIN, SREND
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
      INCLUDE 'params.i'
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C communication algorithm option
      INTEGER COMMOPT
C number of communication buffers (to use in recv-ahead algorithms)
      INTEGER BUFFERS
C communication protocol option
      INTEGER PROTOPT
C forcetype message type offset
      INTEGER FORCETYPE
C my processor id
      INTEGER ME
C processor ids for neighbors in logical ring
      INTEGER LEFT, RIGHT
C index of "me" in logical ring
      INTEGER MYINDEX
C message type offset to use in interprocessor communication
      INTEGER BASE
C current step of the algorithm (an increasing sequence)
      INTEGER STEP
C maximum (final) step number in algorithm
      INTEGER MAXSTEP
C length of the current segment of the vector
      INTEGER LTH
C declared length of the segments of the vector
      INTEGER LTJUMP
C number of vectors being calculated for
      INTEGER NVEC
C current local contribution to vector sum
      REAL DATA1(LTJUMP,NVEC)
C
C     Input/Output
C
C communication buffers for receiving running sums
      REAL DATA2(LTJUMP,NVEC,BUFFERS)
C
C     output
C
C If (P .EQ. MAXSTEP), then SUM contains a segment of the completed 
C vector sum. Otherwise, SUM contains a partial result.
      REAL SUM(LTJUMP,NVEC)
C
C---- Local Variables --------------------------------------------------
C
C loop indices
      INTEGER I, J
C buffer pointers
      INTEGER CURBUF, NXTBUF
C vector length (in bytes)
      INTEGER MSGLTH
C order of send/recv in ordered sendrecv algorithm
      INTEGER ORDER
C                                                                              
C---- Executable Statements --------------------------------------------
C
C     Calculate message length.
      MSGLTH = RBYTES*LTJUMP*NVEC
C
C     Calculate order for ordered algorithms.
      ORDER = MOD(MYINDEX,2)
C
      IF (BUFFERS .EQ. 1) THEN
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C       single buffer algorithms
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
C       Make sure that local sum buffer is available for reuse and
C       add in current local contribution.
        IF (STEP .GT. 1) THEN
          CALL SREND(COMMOPT, PROTOPT, FORCETYPE, ORDER, ME,
     &               STEP-1+BASE, RIGHT, LEFT, MSGLTH, DATA2)
          DO J = 1,NVEC
            DO I = 1,LTH
              SUM(I,J) = DATA1(I,J) + DATA2(I,J,1)
            ENDDO
          ENDDO
C
        ENDIF
C
C       Send new results on and request new data.
        IF (STEP .LT. MAXSTEP) THEN
          CALL SRBEGIN(COMMOPT, PROTOPT, FORCETYPE, ORDER, ME,
     &                 STEP+BASE, RIGHT, MSGLTH, SUM, LEFT, MSGLTH,
     &                 DATA2)
        ENDIF
C
      ELSEIF (BUFFERS .GT. 1) THEN
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C       multiple buffer algorithms
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
        IF (STEP .EQ. 1) THEN
C         If first time, then post "BUFFERS-1" receive requests
C         (BUFFERS-1 outstanding requests, 1 segment "in use").
C
C         Post all but first receive request (disabling safe forcetype
C         protocol - only needs to be used in the "first" request).
          DO I=2,BUFFERS-1
            CALL SR1(PROTOPT, FORCETYPE, .FALSE., ME, I+BASE, LEFT, 
     &               MSGLTH, DATA2(1,1,I))
          ENDDO
C
C         Post first receive request (with safe forcetype protocol).
          CALL SR1(PROTOPT, FORCETYPE, .TRUE., ME, 1+BASE, LEFT, 
     &             MSGLTH, DATA2(1,1,1))
C
C         Initialize buffer pointer.
          CURBUF = BUFFERS
C
        ELSE
C
C         Otherwise, make sure that the local sum buffer is available
C         for reuse and add in the current local contribution.
          CURBUF = MOD(STEP-2,BUFFERS) + 1
          CALL SR3(COMMOPT, PROTOPT, FORCETYPE, ME, STEP-1+BASE, 
     &             LEFT, MSGLTH, DATA2(1,1,CURBUF))
          DO J = 1,NVEC
            DO I = 1,LTH
              SUM(I,J) = DATA1(I,J) + DATA2(I,J,CURBUF)
            ENDDO
          ENDDO
C
        ENDIF
C
C       If not last time, then send updated sum to neighbor.
        IF (STEP .LT. MAXSTEP) THEN
          NXTBUF = MOD(STEP-1,BUFFERS) + 1
          IF ((STEP .EQ. 1) .OR. (STEP .GE. BUFFERS)) THEN
C           Using safeforce protocol.
            CALL SR2(COMMOPT, PROTOPT, FORCETYPE, .TRUE., ORDER, ME,
     &               STEP+BASE, RIGHT, MSGLTH, SUM, 
     &               LEFT, MSGLTH, DATA2(1,1,NXTBUF))
          ELSE
C           Not using safeforce protocol.
            CALL SR2(COMMOPT, PROTOPT, FORCETYPE, .FALSE., ORDER, ME,
     &               STEP+BASE, RIGHT, MSGLTH, SUM, 
     &               LEFT, MSGLTH, DATA2(1,1,NXTBUF))
          ENDIF
        ENDIF
C
C       Post next recv-ahead receive request.
C       (Note: if (BUFFERS .EQ. MAXSTEP), then the first BUFFERS-1
C        requests are all that are needed by the SHIFTSUM algorithm, and
C        the following test is never satisfied. 
C        If (BUFFERS .EQ. MAXSTEP-1), then this test succeeds once, but
C        only need MAXSTEP-1 buffers, so do not "wrap around" (i.e. no
C        reuse of old buffers).) 
        IF (BUFFERS-1+STEP .LT. MAXSTEP) THEN
          CALL SR1(PROTOPT, FORCETYPE, .TRUE., ME,
     &             (BUFFERS-1+STEP)+BASE, LEFT, MSGLTH,
     &             DATA2(1,1,CURBUF)) 
        ENDIF
C
      ELSE
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C       illegal number of buffers specified
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
        WRITE(0,100) ME, BUFFERS
  100   FORMAT (/,' PSTSWM: FATAL ERROR IN SUBROUTINE SHIFTSUM ',/,
     &          ' ILLEGAL COMMUNICATION BUFFER SIZE SPECIFIED',/,
     &          ' PROCID = ',I4,' BUFFERS = ',I4)
        STOP
C
      ENDIF
C
      RETURN
      END
