C#######################################################################
C PSTSWM Version 1.0 (8/1/93)                                          #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
      SUBROUTINE FLTSUM (STAGE, MAXSTAGE, LTH, LTJUMP, NVEC, DATA1, 
     &                   DATA2, SUM)
C
C This subroutine calculates either
C  (a) a complex vector sum and broadcast, or
C  (b) one stage of a ring-shift complex vector sum operation
C      in which the vector is summed in segments and the result
C      is left distributed.
C The vector sum is calculated over a subset of processors cooperating 
C in the computation of a forward Legendre transform procedure.
C
C called by: DZSC2, FTRNPE, FTDPEV, FTDPIV
C calls: HALFSUM, EXCHSUM, RING_MAP, RINGSUM, SHIFTSUM
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
      INCLUDE 'params.i'
C
C---- Common Blocks ----------------------------------------------------
C
C parallel algorithm information
      INCLUDE 'algorithm.i'
C domain decomposition information
      INCLUDE 'spectral.i'
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C current stage of the algorithm (a decreasing sequence)
      INTEGER STAGE
C maximum (initial) stage in algorithm
      INTEGER MAXSTAGE
C length of the current segment of the complex vector
      INTEGER LTH
C declared length of the segments of the complex vector
      INTEGER LTJUMP
C number of vectors being summed
      INTEGER NVEC
C If (P .LT. MAXSTAGE) then DATA1 contains the current segment of local 
C contribution to the vector sum. Otherwise, it is not used.
      COMPLEX DATA1(LTJUMP,NVEC)
C
C     Work Space
C
C communication buffers for receiving running sums 
C (either COMPLEX (LTJUMP,NVEC,1) or COMPLEX (LTJUMP,NVEC,BUFSFLT))
      COMPLEX DATA2(LTJUMP,NVEC,1)
C
C     Input/Output
C
C Input: If (P .EQ. MAXSTAGE), then SUM contains the current segment of 
C  local contribution to vector sum. Otherwise, it is not used.
C Output: If (P .EQ. 1), then SUM contains (a segment of) the 
C  completed complex vector sum. Otherwise, it contains a partial 
C  result.
      COMPLEX SUM(LTJUMP,NVEC)
C
C---- Local Variables --------------------------------------------------
C
C "inverted" stage value, so that can use an increasing sequence of 
C steps in pipeline algorithm
      INTEGER STEP
C bases for message types used in this vector sum algorithm
C (They are used to prevent sends and receives in this operation from
C being erroneously "consumed" by sends and receives in other 
C phases of the program.)
      INTEGER BASE, BASE2
      SAVE BASE
C neighbors in logical ring
      INTEGER LEFT, RIGHT
      SAVE LEFT, RIGHT
C
C---- External Functions -----------------------------------------------
C
C offset for message types
      EXTERNAL MSGBASE
      INTEGER MSGBASE
C processor ids for ring neighbors
      EXTERNAL RING_MAP
      INTEGER RING_MAP
C
C---- Executable Statements --------------------------------------------
C
C      CALL TRACEEVENT('entry', 9, 0, 0)
        IF (NPSPEC_S .GT. 1) THEN
C
C         something to do
          IF (LTOPT .EQ. 0) THEN
C
            IF (COMMFLT .LT. 10) THEN
C
C             Using a ring-shift vector sum algorithm that leaves the 
C             result distributed and allows computation to be overlapped
C             with communication (pipelining).
C
C             "Invert" stage to be an increasing sequence, to make 
C             subsequent code more intuitive.
              STEP = (MAXSTAGE - STAGE) + 1
C
C             If first time, update message type base and calculate
C             neighbors. 
              IF (STEP .EQ. 1) THEN
                BASE = MSGBASE()
                LEFT = RING_MAP(SPECDEX_S-1, NPSPEC_S, MAPSPEC_S)
                RIGHT= RING_MAP(SPECDEX_S+1, NPSPEC_S, MAPSPEC_S)
              ENDIF                
C
C             Compute a stage of the ring-shift vector sum (doubling the
C             length since a complex vector, and SHIFTSUM expects real 
C             vectors).
              CALL SHIFTSUM(COMMFLT, BUFSFLT, PROTFLT, FORCETYPE,
     &                      MAPSPEC_S(SPECDEX_S), LEFT, RIGHT,
     &                      SPECDEX_S, BASE, STEP, MAXSTAGE, 2*LTH,
     &                      2*LTJUMP, NVEC, DATA1, DATA2, SUM)
C
            ELSEIF (COMMFLT .LT. 40) THEN
C
C             Using a column-wise vector sum and broadcast algorithm
C             (doubling the length since a complex vector, and RINGSUM,
C              HALFSUM, and EXCHSUM expect a real vector).
              IF (COMMFLT .LT. 20) THEN
                LEFT  = RING_MAP(SPECDEX_S-1, NPSPEC_S, MAPSPEC_S)
                RIGHT = RING_MAP(SPECDEX_S+1, NPSPEC_S, MAPSPEC_S)
                BASE  = MSGBASE()
                BASE2 = MSGBASE()
                CALL RINGSUM(COMMFLT-10, PROTFLT, FORCETYPE, NPSPEC_S,
     &                       MAPSPEC_S(SPECDEX_S), LEFT, RIGHT,
     &                       SPECDEX_S, BASE, BASE2, 2*LTJUMP*NVEC,
     &                       DATA2, SUM)  
              ELSEIF (COMMFLT .LT. 30) THEN
                BASE  = MSGBASE()
                BASE2 = MSGBASE()
                CALL HALFSUM(COMMFLT-20, BUFSFLT, PROTFLT, FORCETYPE, 
     &                       NPSPEC_S, MAPSPEC_S, SPECDEX_S, BASE,
     &                       BASE2, EXCHSIZE, 2*LTJUMP*NVEC, DATA2, SUM)
              ELSEIF (COMMFLT .LT. 40) THEN
                CALL EXCHSUM(COMMFLT-30, BUFSFLT, PROTFLT, FORCETYPE,
     &                       NPSPEC_S, MAPSPEC_S, SPECDEX_S, MSGBASE(), 
     &                       2*LTH, 2*LTJUMP, NVEC, DATA2, SUM)
              ENDIF
C
            ELSE
C
C             illegal communication option specified
              WRITE(0,100) MAPSPEC_S(SPECDEX_S), LTOPT, COMMFLT
  100         FORMAT (/,' PSTSWM: FATAL ERROR IN SUBROUTINE FLTSUM ',/,
     &                ' ILLEGAL PARALLEL ALGORITHM OPTION SPECIFIED ',/,
     &                ' FOR FORWARD LEGENDRE TRANSFORM',/,
     &                ' PROCID = ',I4,' LTOPT = ',I4,' COMMFLT = ',I4)
              STOP
C
            ENDIF
C
          ELSEIF (LTOPT .EQ. 1) THEN
C
C           Using a transpose algorithm, so nothing to do.
C           (NPSPEC_S .EQ. 1)
C
          ELSE
C
C           illegal Legendre transform option specified
            WRITE(0,100) MAPSPEC_S(SPECDEX_S), LTOPT, COMMFLT
C
          ENDIF
C
        ENDIF
C      CALL TRACEEVENT('exit', 9, 0, 0)
C
      RETURN
      END
