C#######################################################################
C PSTSWM Version 1.0 (8/1/93)                                          #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C globalops.f                                                          C
C                                                                      C
C The following routines are used in computing global combines of      C
C vectors distributed over the logical processor grid:                 C
C                                                                      C
C GLOBALMAX - component-wise global maximum of a set of distributed    C
C             vectors, using PICL global operations                    C
C GLOBALMIN - component-wise global minimum of a set of distributed    C
C             vectors, using PICL global operations                    C
C PGRIDSUM  - summation of distributed physical (longitude/latitude)   C
C             grid data, using rowsize and columnwise operations       C
C SGRIDSUM  - summation of distributed spectral (wavenumber/polynomial C
C             degree) grid data, using rowsize and columnwise          C
C             operations                                               C
C                                                                      C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE GLOBALMAX(LTH,VECTOR)
C
C This subroutine calculates the component-wise global maximum of a 
C distributed set of vectors, and broadcasts the result.
C
C called by: SPEED, ERRANL
C calls: MSGBASE, PICL routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
      INCLUDE 'params.i'
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C length of vector
      INTEGER LTH
C
C     Input/Output
C
C data vector
      REAL VECTOR(LTH)
C
C---- Local Variables --------------------------------------------------
C
C base for message types used in the global maximum and broadcast
      INTEGER BASE
C
C---- External Functions -----------------------------------------------
C
C offset for message types
      EXTERNAL MSGBASE
      INTEGER MSGBASE
C
C---- Executable Statements --------------------------------------------
C
C     Calculate maximum.
      BASE = MSGBASE()
      CALL GMAX0(VECTOR, LTH, 4, BASE, 0)
C
C     Broadcast result.
      BASE = MSGBASE()
      CALL BCAST0(VECTOR, RBYTES*LTH, BASE, 0)
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE GLOBALMIN(LTH,VECTOR)
C
C This subroutine calculates the component-wise global minimum of a 
C distributed set of vectors, and broadcasts the result.
C
C called by: ERRANL
C calls: MSGBASE, PICL routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
      INCLUDE 'params.i'
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C length of vector
      INTEGER LTH
C
C     Input/Output
C
C data vector
      REAL VECTOR(LTH)
C
C---- Local Variables --------------------------------------------------
C
C base for message types used in the global minimum and broadcast
      INTEGER BASE
C
C---- External Functions -----------------------------------------------
C
C offset for message types
      EXTERNAL MSGBASE
      INTEGER MSGBASE
C
C---- Executable Statements --------------------------------------------
C
C     Calculate minimum.
      BASE = MSGBASE()
      CALL GMIN0(VECTOR, LTH, 4, BASE, 0)
C
C     Broadcast result.
      BASE = MSGBASE()
      CALL BCAST0(VECTOR, RBYTES*LTH, BASE, 0)
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE PGRIDSUM(VLTH, VLTHMX, ILTH, JLTH, DATA, WS, SUM)
C
C This routine sums lon/lat data distributed over the logical processor 
C mesh when the standard physical space partitioning is in effect. It
C uses an ordering (binary tree sum in latitude, followed by binary tree
C sum in longitude) which is processor-independent when ILTH and JLTH
C are powers of two. 
C
C called by: ERRANL, NRGTCS, SPEED
C calls: MSGBASE, EXCHSUM, TREESUM, PICL routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
      INCLUDE 'params.i'
C
C---- Common Blocks ----------------------------------------------------
C
C parallel algorithm information
      INCLUDE 'algorithm.i'
C domain decomposition definition variables
      INCLUDE 'physical.i'
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C local length of distributed vector being summed
      INTEGER VLTH
C declared length of local vector
      INTEGER VLTHMX
C dimensions of longitude/latitude grid
      INTEGER ILTH, JLTH
C
C     Input/Work Space
C
C longitude/latitude grid data (overwritten during calculation)
      REAL DATA(VLTHMX,ILTH,JLTH)
C
C     Work Space
C
C work array
      REAL WS(VLTHMX,ILTH)
C
C     Output
C
C completed vector sum
      REAL SUM(VLTHMX)
C
C---- Local Variables --------------------------------------------------
C
C base for message types used in the columnwise and rowwise sums
      INTEGER BASE
C
C---- External Functions -----------------------------------------------
C
C offset for message types
      EXTERNAL MSGBASE
      INTEGER MSGBASE
C
C---- Executable Statements --------------------------------------------
C
C     Sum local vectors using binary tree ordering over latitude.
C     (result in WS)
      CALL TREESUM(1, VLTH, VLTHMX, ILTH, JLTH, DATA, WS)
C
C     Set message type base.
      BASE = MSGBASE()
C
C     Add local sum to sums from nonlocal latitudes.
      CALL EXCHSUM(0, 1, 0,0, NPLAT_P, MAPLAT_P, LATDEX_P, BASE, VLTH,
     &             VLTHMX, ILTH, DATA, WS)
C
C     Sum current local vectors using binary tree ordering over 
C     longitude.
      CALL TREESUM(1, VLTH, VLTHMX, 1, ILTH, WS, SUM)
C
C     Set message type base.
      BASE = MSGBASE()
C
C     Add local sum to sums from nonlocal longitudes.
      CALL EXCHSUM(0, 1, 0,0, NPLON_P, MAPLON_P, LONDEX_P, BASE, VLTH,
     &             VLTH, 1, WS, SUM)
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SGRIDSUM(VLTH, VLTHMX, ILTH, JLTH, DATA, WS, SUM)
C
C This routine sums spectral data distributed over the logical processor 
C mesh when the standard spectral space partitioning is in effect.
C It uses an ordering (binary tree sum in NN, followed by binary tree 
C sum in MM) which is processor-independent when ILTH and JLTH are
C powers of two (which is unlikely in the spectral space, but this 
C ordering is still "closer" to being reproducible than the obvious 
C alternatives)
C
C called by: SPCANL
C calls: MSGBASE, EXCHSUM, TREESUM, PICL routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
      INCLUDE 'params.i'
C
C---- Common Blocks ----------------------------------------------------
C
C parallel algorithm information
      INCLUDE 'algorithm.i'
C domain decomposition definition variables
      INCLUDE 'spectral.i'
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C local length of distributed vector being summed
      INTEGER VLTH
C declared length of local vector
      INTEGER VLTHMX
C dimensions of untruncated spectral grid
      INTEGER ILTH, JLTH
C
C     Input/Work Space
C
C spectral data
C (overwritten during calculation)
      REAL DATA(VLTHMX,ILTH,JLTH)
C
C     Work Space
C
C work array
      REAL WS(VLTHMX,ILTH)
C
C     Output
C
C completed vector sum
      REAL SUM(VLTHMX)
C
C---- Local Variables --------------------------------------------------
C
C base for message types used in the columnwise and rowwise sums
      INTEGER BASE
C
C---- External Functions -----------------------------------------------
C
C offset for message types
      EXTERNAL MSGBASE
      INTEGER MSGBASE
C
C---- Executable Statements --------------------------------------------
C
C     Sum local vectors using binary tree ordering over NN (for local 
C     Fourier coefficients).
      CALL TREESUM(1, VLTH, VLTHMX, ILTH, JLTH, DATA, WS)
C
      IF ((LTOPT .EQ. 0) .AND. (COMMFLT .LT. 10)) THEN
C       The spectral coefficients are partitioned in the "NN" direction,
C       so sum them (otherwise, they are duplicated, so don't sum them).
C
C       Set message type base.
        BASE = MSGBASE()
C
C       Add local sum to sums from nonlocal latitudes.
        CALL EXCHSUM(0, 1, 0,0, NPSPEC_S, MAPSPEC_S, SPECDEX_S, BASE,
     &               VLTH, VLTHMX, ILTH, DATA, WS)
C
      ENDIF
C
C     Sum current local vectors using binary tree ordering over 
C     Fourier coefficients.
      CALL TREESUM(1, VLTH, VLTHMX, 1, ILTH, WS, SUM)
C
C     Set message type base.
      BASE = MSGBASE()
C
C     Add local sum to sums from nonlocal wavenumbers.
      CALL EXCHSUM(0, 1, 0,0, NPFC_S, MAPFC_S, FCDEX_S, BASE, VLTH, 
     &             VLTH, 1, WS, SUM)
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC




