C#######################################################################
C PSTSWM Version 1.0 (8/1/93)                                          #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
      SUBROUTINE TRANSPOSE(COMMOPT, BUFFERS, PROTOPT, FORCETYPE,
     &                     MAPSIZE, MAP, MYINDEX, BASE, DIR, W, M, N,
     &                     H1, H2, ML, NL, MAX, A, WS, B)
C
C This routine transposes the array
C                            A(W,ML,H1,H2,N)
C to an array with general form
C                            B(W,NL,H1,H2,M),
C where both A and B are distributed over MAPSIZE processors. Both
C arrays are declared to be real, but are real if (W .EQ. 1) and
C complex if (W .EQ. 2). The actual organization of B is determined by
C the parameter DIR; this specifies where TRANSPOSE is called from,
C allowing the routine to order B as required for subsequent stages in
C PSTSWM.  The parameters COMMOPT, PROTOPT, and BUFFERS select one of a
C variety of different transpose algorithms. 
C
C 1) The basic idea:
C
C  We compute B = transpose(A), where:
C    A is a matrix of size (W,M,H1,H2,N), distributed by rows (M index); 
C    B is a matrix of size (W,N,H1,H2,M), distributed by rows (N index).
C
C  Each processor has part of A and B as follows:
C      A(W,ML,H1,H2,N):
C          Each node has ML = (M/P or M/P+1) rows of A; 
C          excess rows are allocated to lower-numbered nodes.
C      B(W,NL,H1,H2,M):
C          Each node has NL = (N/P or N/P+1) rows of B; 
C          excess rows are allocated to lower-numbered nodes.
C 
C 2) Specializations:
C  
C  The routine TRANS incorporates additional reorganizations that allow 
C  for B having a shape different than (W,N,H1,H2,M), and an
C  organization different than a simple transpose of A. TRANS is called
C  once for each incoming message and once for the local component of
C  the transpose.  It takes the data received in a message (or found
C  locally): 
C                         WS(W,ML,H1,H2,NL)
C  and puts this in the correct place in the array B.
C
C  There are six different versions of TRANS, distinguished by DIR:
C 
C  DIR=-1: Used after real forward transpose.  The array B is formed as 
C          follows, where MAX is the M value with padding used in
C          PSTSWM: 
C                          B(W,MAX,NL,H1,H2)  
C  DIR=+1: Used after real backward transpose.  The array B is formed as
C          follows, where MAX is the NL value with padding used in
C          PSTSWM: 
C                          B(W,MAX,M,H1,H2)
C  DIR=-2: Used after complex forward transpose following real forward
C          transpose in transpose FFT/transpose LT algorithm.
C          The array B is formed as follows, where MAX is the NL value
C          with padding used in PSTSWM:
C                          B(W,MAX,H1,M,H2)  
C          This transpose "undistributes" the latitude dimension, and
C          must take into account that two nonadjacent sets of latitudes
C          are grouped in array A. 
C  DIR=+2: Used after complex backward transpose preceding real forward
C          transpose in transpose FFT/transpose LT algorithm.
C          The array B is formed as follows, where MAX is the M value
C          with padding used in PSTSWM:
C                          B(W,MAX,H1,NL,H2)  
C          This transpose "redistributes" the latitude dimension, and
C          must take into account that two nonadjacent sets of latitudes
C          are grouped in array B. 
C  DIR=-3: Used after complex forward transpose following distributed FFT
C          in distributed FFT/transpose LT algorithm.
C          The array B is formed as follows, where MAX is the H1 value
C          with padding used in PSTSWM:
C                          B(W,MAX,NL,M,H2)  
C          This transpose "undistributes" the latitude dimension, and
C          must take into account that two nonadjacent sets of latitudes
C          are grouped in array A. 
C  DIR=+3: Used after complex backward transpose preceding distributed FFT
C          in distributed FFT/transpose LT algorithm.
C          The array B is formed as follows, where MAX is the H1 value
C          with padding used in PSTSWM:
C                          B(W,MAX,M,NL,H2)
C          This transpose "redistributes" the latitude dimension, and
C          must take into account that two nonadjacent sets of latitudes
C          are grouped in array B. 
C
C 3) Algorithm Variants:
C
C  The code incorporates numerous variants of three different parallel
C  transpose algorithms. The algorithms and some of the variants are
C  selected by the parameter COMMOPT as follows:
C
C  a) If ((COMMOPT .GE. 0) .AND. (COMMOPT .LE. 3)), then an O(P) step
C  "send/recv" algorithm is used. At each step each processor sends a
C  message to one processor and receives a message from another.
C
C  b) If ((COMMOPT .GE. 10) .AND. (COMMOPT .LE. 13)), then an O(P) step 
C  "swap" algorithm is used. At each step each processor exchanges
C  messages with another processor.
C
C  c) If ((COMMOPT .GE. 2) .AND. (COMMOPT .LE. 21)), then an O(log P) step 
C  algorithm is used. At each step each processor exchanges messages
C  with another processor. For this algorithm, MAPSIZE must be a power
C  of two and M and N must be integer multiples of MAPSIZE. The
C  decreased number of steps in this algorithm compared to the first two
C  comes at the cost of a larger amount of data moved.
C
C  Other variants for these three algorithms are described in the
C  routines SRTRANS, SWAPTRANS, and LOGTRANS.
C
C called by: RFTLON
C calls: LOGTRANS, MSGBASE, SRTRANS, SWAPTRANS
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C communication algorithm option
      INTEGER COMMOPT
C number of communication buffers (to use in recv-ahead algorithms)
      INTEGER BUFFERS
C communication protocol option 
      INTEGER PROTOPT
C forcetype message type offset
      INTEGER FORCETYPE
C number of processors in subset
      INTEGER MAPSIZE
C processor subset (and processor ordering)
      INTEGER MAP(0:MAPSIZE-1)
C index of "me" in MAP array
      INTEGER MYINDEX
C message type offset to use in interprocessor communication
      INTEGER BASE
C context in which transpose occurs, and hence required data organization
      INTEGER DIR
C number of reals in datatype (1: REAL, 2: COMPLEX)
      INTEGER W
C dimensions of input and output arrays
      INTEGER M, N, H1, H2, ML, NL, MAX
C local component of the array that is to be transposed
      REAL A(W,ML,H1,H2,N)
C
C     Work Space
C
C message buffers
      REAL WS(W*NL*H1*H2*M*BUFFERS)
C
C     Output
C
C Local component of the transposed array.
C (organized as REAL (W,MAX,NL,H1,H2), (W,MAX,M,H1,H2), (W,MAX,H1,M,H2),
C  (W,MAX,H1,NL,H2), (W,MAX,NL,M,H2), or (W,MAX,M,NL,H2)) 
      REAL B(1)
C
C---- Executable Statements --------------------------------------------
C
C      CALL TRACEEVENT('entry', 10, 0, 0)
        IF(COMMOPT .LT. 10) THEN
C
C         Using an O(P) step "send/recv" transpose algorithm.
          CALL SRTRANS(COMMOPT, PROTOPT, FORCETYPE, MAPSIZE, MAP,
     &                 MYINDEX, BASE, DIR, W, M, N, H1, H2, ML, NL, MAX,
     &                 A, WS, B) 
C
        ELSEIF (COMMOPT .LT. 20) THEN
C
C         Using an O(P) step "swap" transpose algorithm.
          CALL SWAPTRANS(COMMOPT-10, PROTOPT, FORCETYPE, MAPSIZE, MAP, 
     &                   MYINDEX, BASE, DIR, W, M, N, H1, H2, ML, NL,
     &                   MAX, A, WS, B) 
C
        ELSEIF (COMMOPT .LT. 30) THEN
C
C         Using an O(log P) step "swap" transpose algorithm.
          CALL LOGTRANS(COMMOPT-20, BUFFERS, PROTOPT, FORCETYPE,
     &                  MAPSIZE, MAP, MYINDEX, BASE, DIR, W, M, N, H1,
     &                  H2, ML, NL, MAX, A, WS, B)
C
        ELSE
C
C         illegal communication option specified
          WRITE(0,100) MAP(MYINDEX), COMMOPT
  100     FORMAT (/,' PSTSWM: FATAL ERROR IN SUBROUTINE TRANSPOSE ',/, 
     &            ' ILLEGAL COMMUNICATION OPTION SPECIFIED',/,
     &            ' PROCID = ',I4,' COMMOPT = ',I4)
          STOP
C
        ENDIF
C      CALL TRACEEVENT('exit', 10, 0, 0)
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TRANS(DIR, W, M, H1, H2, ML, NL, MAX, START, FROM, TO)
C
C  This subroutine transposes FROM(W,ML,H1,H2,NL) into the indicated
C  part of TO. There are six different versions of TRANS, distinguished
C  by DIR:  
C  DIR=-1: Used after real forward transpose. Thus, both FROM and TO are 
C          real (W .EQ. 1). The array TO is formed as follows, where MAX
C          is the M value with padding used in PSTSWM:  
C                          TO(MAX,NL,H1,H2)  
C          Here START is an offset to the first (MAX) index.
C  DIR=+1: Used after real backward transpose. Thus, both FROM and TO
C          are real (W .EQ. 1). The array TO is formed as follows, where
C          MAX is the NL value with padding used in PSTSWM:   
C                          TO(MAX,M,H1,H2)
C          Here START is an offset to the second (M) index.
C  DIR=-2: Used after complex forward transpose following real forward
C          transpose in transpose FFT/transpose LT algorithm. Thus, both
C          FROM and TO are complex (W .EQ. 2). The array TO is formed as
C          follows, where, MAX is the NL value with padding used in
C          PSTSWM:  
C                          TO(MAX,H1,M,H2)  
C          Here START is an offset to the third (M) index.
C  DIR=+2: Used after complex backward transpose preceding real forward
C          transpose in transpose FFT/transpose LT algorithm. Thus, both
C          FROM and TO are complex (W .EQ. 2). The array TO is formed as
C          follows, where MAX is the M value with padding used in
C          PSTSWM: 
C                          TO(MAX,H1,NL,H2)  
C          Here START is an offset to the first (MAX) index. 
C  DIR=-3: Used after complex forward transpose following distributed FFT
C          in distributed FFT/transpose LT algorithm. Thus, both
C          FROM and TO are complex (W .EQ. 2). The array TO is formed as
C          follows, where MAX is the H1 value with padding used in
C          PSTSWM:  
C                          TO(MAX,NL,M,H2)  
C          Here START is an offset to the third (M) index.
C  DIR=+3: Used after complex backward transpose preceding distributed FFT
C          in distributed FFT/transpose LT algorithm. Thus, both
C          FROM and TO are complex (W .EQ. 2). The array TO is formed as
C          follows, where MAX is the H1 value with padding used in
C          PSTSWM: 
C                          TO(MAX,M,NL,H2)
C          Here START is an offset to the second (M) index.
C
C called by: LGTRNS1, LGTRNS2, SRTRNS1, SRTRNS2, SWPTRNS1, SWPTRNS2
C calls: TRANS1, TRANS2
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C context in which transpose occurs, and hence required data organization
      INTEGER DIR
C number of reals in datatype (1: REAL, 2: COMPLEX)
      INTEGER W
C dimensions of input and output arrays
      INTEGER M, H1, H2, ML, NL, MAX
C starting location in the "M" index of TO where the transpose is to
C begin 
      INTEGER START
C array that is to be transposed
      REAL FROM(W,ML,H1,H2,NL)
C
C     Output
C
C destination of transposed array
C (organized as REAL (MAX,NL,H1,H2), REAL (MAX,M,H1,H2),
C  COMPLEX (MAX,H1,M,H2), COMPLEX (MAX,H1,NL,H2), COMPLEX (MAX,NL,M,H2)
C  or COMPLEX (MAX,M,NL,H2))
      REAL TO(1)
C
C---- Executable Statements --------------------------------------------
C
      IF(DIR .EQ. -1) THEN
        CALL TRANS1(H1*H2, ML, NL, MAX, START, FROM, TO)
      ELSE IF(DIR .EQ. +1) THEN
        CALL TRANS2(M, H1*H2, ML, NL, MAX, START, FROM ,TO)
      ELSE IF(DIR .EQ. -2) THEN
        CALL TRANS3(M, H1, H2, ML, NL, MAX, START, FROM ,TO)
      ELSE IF(DIR .EQ. +2) THEN
        CALL TRANS4(H1, H2, ML, NL, MAX, START, FROM ,TO)
      ELSE IF(DIR .EQ. -3) THEN
        CALL TRANS5(M, H1, H2, ML, NL, MAX, START, FROM ,TO)
      ELSE IF(DIR .EQ. +3) THEN
        CALL TRANS6(M, H1, H2, ML, NL, MAX, START, FROM ,TO)
      ELSE
        WRITE(0,100) DIR
  100   FORMAT (/,' PSTSWM: FATAL ERROR IN SUBROUTINE TRANS ',/,
     &          ' INVALID TRANSPOSE REORGANIZATION OPTION SPECIFIED',/, 
     &          ' DIR = ',I4)
        STOP
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TRANS1(H, ML, NL, MMAX, START, FROM, TO)
C
C This subroutine transposes FROM(ML,H,NL) into the indicated part
C of TO. The array TO is formed as follows, where MMAX is the M value
C with padding used in PSTSWM: 
C                         TO(MMAX,NL,H)  
C Here START is an offset to the first index (MMAX).
C
C called by: TRANS
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C dimensions of input and output arrays
      INTEGER H, ML, NL, MMAX
C starting location in the "ML" index of TO where the transpose is to
C begin 
      INTEGER START
C array that is to be transposed
      REAL FROM(ML,H,NL)
C
C     Output
C
C destination of transposed array
      REAL TO(MMAX,NL,H)
C
C---- Local Variables --------------------------------------------------
C
C loop indices
      INTEGER I, K, L
C
C---- Executable Statements --------------------------------------------
C
      DO I = 1,NL
        DO K = 1,H
          DO L = 1,ML
            TO(START+L-1,I,K) = FROM(L,K,I)
          ENDDO
        ENDDO
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TRANS2(M, H, ML, NL, NLMAX, START, FROM, TO)
C
C This subroutine transposes FROM(ML,H,NL) into the indicated part of
C TO. The array TO is formed as follows, where NLMAX is the NL value
C with padding used in PSTSWM:  
C                         TO(NLMAX,M,H)
C Here START is an offset to the second index (M).
C
C called by: TRANS
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C dimensions of input and output arrays
      INTEGER M, H, ML, NL, NLMAX
C starting location in the "M" index of TO where the transpose is to
C begin 
      INTEGER START
C array that is to be transposed
      REAL FROM(ML,H,NL)
C
C     Output
C
C destination of transposed array
      REAL TO(NLMAX,M,H)
C
C---- Local Variables --------------------------------------------------
C
C loop indices
      INTEGER I, K, L
C
C---- Executable Statements --------------------------------------------
C
      DO I = 1,NL
        DO K = 1,H
          DO L = 1,ML
            TO(I,START+L-1,K) = FROM(L,K,I)
          ENDDO
        ENDDO
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TRANS3(M, H1, H2, ML, NL, NLMAX, START, FROM, TO)
C
C This subroutine transposes FROM(ML,H1,H2,NL) into the indicated part
C of TO. The array TO is formed as follows, where NLMAX is the NL value
C with padding used in PSTSWM: 
C                         TO(NLMAX,H1,M,H2)
C Here START is an offset to the third (M) index.
C
C called by: TRANS
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C dimensions of input and output arrays
      INTEGER M, H1, H2, ML, NL, NLMAX
C starting location in the "M" index of TO where the transpose is to
C begin 
      INTEGER START
C array that is to be transposed
      COMPLEX FROM(ML,H1,H2,NL)
C
C     Output
C
C destination of transposed array
      COMPLEX TO(NLMAX,H1,M,H2)
C
C---- Local Variables --------------------------------------------------
C
C loop indices
      INTEGER I, J, K, L
C
C---- Executable Statements --------------------------------------------
C
      DO I = 1,NL
        DO J = 1,H2
          DO K = 1,H1
            DO L = 1,ML
              TO(I,K,START+L-1,J) = FROM(L,K,J,I)
            ENDDO
          ENDDO
        ENDDO
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TRANS4(H1, H2, ML, NL, MMAX, START, FROM, TO)
C
C This subroutine transposes FROM(ML,H1,H2,NL) into the indicated part
C of TO. The array TO is formed as follows, where MMAX is the M value
C with padding used in PSTSWM:   
C                         TO(MMAX,H1,NL,H2)  
C Here START is an offset to the first (MMAX) index.
C
C called by: TRANS
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C dimensions of input and output arrays
      INTEGER H1, H2, ML, NL, MMAX
C starting location in the "MMAX" index of TO where the transpose is to
C begin 
      INTEGER START
C array that is to be transposed
      COMPLEX FROM(ML,H1,H2,NL)
C
C     Output
C
C destination of transposed array
      COMPLEX TO(MMAX,H1,NL,H2)
C
C---- Local Variables --------------------------------------------------
C
C loop indices
      INTEGER I, J, K, L
C
C---- Executable Statements --------------------------------------------
C
      DO L = 1,NL
        DO K = 1,H2
          DO J = 1,H1
            DO I = 1,ML
              TO(START+I-1,J,L,K) = FROM(I,J,K,L)
            ENDDO
          ENDDO
        ENDDO
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TRANS5(M, H1, H2, ML, NL, H1MAX, START, FROM, TO)
C
C This subroutine transposes FROM(ML,H1,H2,NL) into the indicated part
C of TO. The array TO is formed as follows, where H1MAX is the H1 value
C with padding used in PSTSWM:
C                         TO(H1MAX,NL,M,H2)
C Here START is an offset to the third (M) index.
C
C called by: TRANS
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C dimensions of input and output arrays
      INTEGER M, H1, H2, ML, NL, H1MAX
C starting location in the "M" index of TO where the transpose is to
C begin 
      INTEGER START
C array that is to be transposed
      COMPLEX FROM(ML,H1,H2,NL)
C
C     Output
C
C destination of transposed array
      COMPLEX TO(H1MAX,NL,M,H2)
C
C---- Local Variables --------------------------------------------------
C
C loop indices
      INTEGER I, J, K, L
C
C---- Executable Statements --------------------------------------------
C
      DO L = 1,NL
        DO K = 1,H2
          DO J = 1,H1
            DO I = 1,ML
              TO(J,L,START+I-1,K) = FROM(I,J,K,L)
            ENDDO
          ENDDO
        ENDDO
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE TRANS6(M, H1, H2, ML, NL, H1MAX, START, FROM, TO)
C
C This subroutine transposes FROM(ML,H1,H2,NL) into the indicated part
C of TO. The array TO is formed as follows, where H1MAX is the H1 value
C with padding used in PSTSWM:   
C                         TO(H1MAX,M,NL,H2)  
C Here START is an offset to the second (MMAX) index.
C
C called by: TRANS
C calls: 
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C dimensions of input and output arrays
      INTEGER M, H1, H2, ML, NL, H1MAX
C starting location in the "M" index of TO where the transpose is to
C begin 
      INTEGER START
C array that is to be transposed
      COMPLEX FROM(ML,H1,H2,NL)
C
C     Output
C
C destination of transposed array
      COMPLEX TO(H1MAX,M,NL,H2)
C
C---- Local Variables --------------------------------------------------
C
C loop indices
      INTEGER I, J, K, L
C
C---- Executable Statements --------------------------------------------
C
      DO L = 1,NL
        DO K = 1,H2
          DO J = 1,H1
            DO I = 1,ML
              TO(J,START+I-1,L,K) = FROM(I,J,K,L)
            ENDDO
          ENDDO
        ENDDO
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC

