C#######################################################################
C PSTSWM Version 4.0 (12/1/94)                                         #
C  (Stripped down PVM-only version (4/13/95), for use in ParkBench     #
C   benchmark suite)                                                   #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
C include precision declaration definitions                            #
#include "precision.i"
C#######################################################################
      SUBROUTINE SRTRANS(COMMOPT, PROTOPT, MAPSIZE, MAP,
     &                   MYINDEX, BASE, DIR, W, M, N, H1, H2, LM, LN,
     &                   MX, A, WS, B) 
C
C This subroutine calls routines that compute B = transpose(A) using
C an O(P) "send/recv" transpose algorithm, where each step consists of
C sending a message to one processor and receiving a message from
C another. Here
C  A is a matrix of size (W,M,H1,H2,N) distributed by rows and
C  B is a matrix of size (W,N,H1,H2,M) distributed by rows
C over MAPSIZE processors, and each processor has part of A and B as 
C follows:
C  A(W,LM(MYINDEX),H1,H2,N): Processor I has LM(I) rows of A; 
C                            LM(0) + ... + LM(MAPSIZE-1) = M.
C  B(W,LN(MYINDEX),H1,H2,M): Processor I has LN(I) rows of B; 
C                            LN(0) + ... + LN(MAPSIZE-1) = N.
C W is 1 or 2, depending on whether the arrays are REAL or COMPLEX.
C
C Alternative reorganizations of B are also supported, and are 
C determined by the parameter DIR. DIR specifies where TRANSPOSE is 
C called from, allowing the routine to order B as required for 
C subsequent stages in PSTSWM: 
C  DIR=-1: B(W,MX,LN,H1,H2)  (Used after real forward transpose.)
C  DIR=+1: B(W,MX,M,H1,H2)   (Used after real backward transpose.)
C  DIR=-2: B(W,MX,H1,M,H2)   (Used after complex forward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=+2: B(W,MX,H1,LN,H2)  (Used after complex backward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=-3: B(W,MX,LN,M,H2)   (Used after complex forward transpose
C                             in distributed FFT/transpose LT algorithm)
C  DIR=+3: B(W,MX,M,LN,H2)   (Used after complex backward transpose
C                             in distributed FFT/transpose LT algorithm)
C The MAP array defines the processor subset and ordering to use.
C
C Communication options (COMMOPT) for SRTRANS include:
C  IF (COMMOPT .EQ. 0) simple SENDRECV: send/recv/trans
C  IF (COMMOPT .EQ. 1) ordered SENDRECV: [send/recv]|[recv/send]/trans
C  IF (COMMOPT .EQ. 2) simple SENDRECV with recv-ahead
C  IF (COMMOPT .EQ. 3) ordered SENDRECV with recv-ahead
C  IF (COMMOPT .EQ. 4) send-ahead SENDRECV with recv-ahead
C Communication protocol options (PROTOPT) for SRTRANS include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: TRANSPOSE
C calls: SRTRNS1, SRTRNS2
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C number of processors in subset
      INTEGER MAPSIZE
C processor subset (and processor ordering)
      INTEGER MAP(0:MAPSIZE-1)
C index of "me" in MAP array
      INTEGER MYINDEX
C message type offset to use in interprocessor communication
      INTEGER BASE
C context in which transpose occurs, and hence required data organization
      INTEGER DIR
C number of reals in datatype (1: REAL, 2: COMPLEX)
      INTEGER W
C dimensions of input and output arrays on processors in MAP array
      INTEGER M, N, H1, H2, MX
      INTEGER LM(0:MAPSIZE-1)
      INTEGER LN(0:MAPSIZE-1)
C local component of the array that is to be transposed, of size
C REAL (W,LM(MYINDEX),H1,H2,N)
      REAL A(1)
C
C     Work Space
C
C message buffers, of size
C REAL (W,LN(MYINDEX),H1,H2,M)
      REAL WS(1)
C
C     Output
C
C local component of the transposed array.
C (organized as REAL (W,MX,LN,H1,H2), (W,MX,M,H1,H2), (W,MX,H1,M,H2),
C  (W,MX,H1,LN,H2), (W,MX,LN,M,H2), or (W,MX,M,LN,H2)) 
      REAL B(1)
C
C---- Executable Statements --------------------------------------------
C
      IF (MAPSIZE .GT. 1) THEN
C       Compute transpose.
C
        IF (COMMOPT .LE. 1) THEN
C         no recv-ahead algorithms
          CALL SRTRNS1(COMMOPT, PROTOPT, MAPSIZE, MAP,
     &                 MYINDEX, BASE, DIR, W, M, N, H1, H2, LM, LN,
     &                 LM(MYINDEX), LN(MYINDEX), MX, A, WS, B) 
C
        ELSEIF (COMMOPT .LE. 4) THEN
C         recv-ahead algorithms
          CALL SRTRNS2(COMMOPT-2, PROTOPT, MAPSIZE, MAP,
     &                 MYINDEX, BASE, DIR, W, M, N, H1, H2, LM, LN,
     &                 LM(MYINDEX), LN(MYINDEX), MX, A, WS, B) 
C
        ELSE
C         illegal communication option specified
          WRITE(0,100) MAP(MYINDEX), COMMOPT
  100     FORMAT (/,' PSTSWM: FATAL ERROR IN SUBROUTINE SRTRANS ',/,
     &            ' ILLEGAL COMMUNICATION OPTION SPECIFIED',/,
     &            ' PROCID = ',I4,' COMMOPT = ',I4)
          STOP
C
        ENDIF
C
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SRTRNS1(COMMOPT, PROTOPT, MAPSIZE, MAP, 
     &                   MYINDEX, BASE, DIR, W, M, N, H1, H2, LM, LN,
     &                   ML, NL, MX, A, WS, B) 
C
C This routine computes B = transpose(A) using an O(P) "send/recv"
C transpose algorithm without recv-ahead, where each step consists of
C sending a message to one processor and receiving a message from
C another. Here
C  A is a matrix of size (W,M,H1,H2,N) distributed by rows and
C  B is a matrix of size (W,N,H1,H2,M) distributed by rows
C over MAPSIZE processors, and each processor has part of A and B as 
C follows:
C  A(W,LM(MYINDEX),H1,H2,N): Processor I has LM(I) rows of A; 
C                            LM(0) + ... + LM(MAPSIZE-1) = M.
C  B(W,LN(MYINDEX),H1,H2,M): Processor I has LN(I) rows of B; 
C                            LN(0) + ... + LN(MAPSIZE-1) = N.
C W is 1 or 2, depending on whether the arrays are REAL or COMPLEX.
C
C Alternative reorganizations of B are also supported, and are 
C determined by the parameter DIR. DIR specifies where TRANSPOSE is 
C called from, allowing the routine to order B as required for 
C subsequent stages in PSTSWM: 
C  DIR=-1: B(W,MX,LN,H1,H2)  (Used after real forward transpose.)
C  DIR=+1: B(W,MX,M,H1,H2)   (Used after real backward transpose.)
C  DIR=-2: B(W,MX,H1,M,H2)   (Used after complex forward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=+2: B(W,MX,H1,LN,H2)  (Used after complex backward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=-3: B(W,MX,LN,M,H2)   (Used after complex forward transpose
C                             in distributed FFT/transpose LT algorithm)
C  DIR=+3: B(W,MX,M,LN,H2)   (Used after complex backward transpose
C                             in distributed FFT/transpose LT algorithm)
C The MAP array defines the processor subset and ordering to use.
C
C Communication options (COMMOPT) for SRTRNS1 include:
C  IF (COMMOPT .EQ. 0) simple SENDRECV: send/recv/trans
C  IF (COMMOPT .EQ. 1) ordered SENDRECV: [send/recv]|[recv/send]/trans
C Communication protocol options (PROTOPT) for LOGTRNS1 include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: SRTRANS
C calls: SRTRNS_INIT, SENDRECV, TRANS
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C number of processors in subset
      INTEGER MAPSIZE
C processor subset (and processor ordering)
      INTEGER MAP(0:MAPSIZE-1)
C index of "me" in MAP array
      INTEGER MYINDEX
C message type offset to use in interprocessor communication
      INTEGER BASE
C context in which transpose occurs, and hence required data organization
      INTEGER DIR
C number of reals in datatype (1: REAL, 2: COMPLEX)
      INTEGER W
C dimensions of input and output arrays on processors in MAP array
      INTEGER M, N, H1, H2, ML, NL, MX
      INTEGER LM(0:MAPSIZE-1)
      INTEGER LN(0:MAPSIZE-1)
C Local component of the array that is to be transposed.
C (organized as REAL (W,LM(MYINDEX),H1,H2,N))
      REAL A(W*ML*H1*H2,N)
C
C     Work Space
C
C message buffers
C (organized as REAL (W,LN(MYINDEX),H1,H2,M))
      REAL WS(W*NL*H1*H2,M)
C
C     Output
C
C Local component of the transposed array.
C (organized as REAL (W,MX,NL,H1,H2), (W,MX,M,H1,H2), (W,MX,H1,M,H2),
C  (W,MX,H1,NL,H2), (W,MX,NL,M,H2), or (W,MX,M,NL,H2)) 
      REAL B(1)
C
C---- Local Variables --------------------------------------------------
C
C true processor id for "me"
      INTEGER ME
C number of bytes in a column segment being sent and in a row segment
C being received. 
      INTEGER COLSIZE, ROWSIZE
C size of message being sent and received during a SENDRECV
      INTEGER SENDSIZE, RECVSIZE
C arrays indicating beginning index, size, and destination of 
C message being sent during a given SENDRECV
      INTEGER SENDDEX(0:NPROCSX-1), SENDCOLS(0:NPROCSX-1),
     &        SENDNODE(NPROCSX-1)
C arrays indicating beginning index, size, and destination of 
C message being received during a given SENDRECV 
      INTEGER RECVDEX(0:NPROCSX-1), RECVROWS(0:NPROCSX-1),
     &        RECVNODE(NPROCSX-1)
C array indicating whether this processor sends or receives first
C during a SENDRECV at a given step (for synchronous communication)
      INTEGER ORDER(NPROCSX-1)
C loop index
      INTEGER STEP
C
C---- Executable Statements --------------------------------------------
C
C     Identify who I am.
      ME = MAP(MYINDEX)
C
C     Calculate length of a single column segment (for sending) and a
C     single row segment (for receiving), in bytes.
      COLSIZE = RBYTES*W*LM(MYINDEX)*H1*H2
      ROWSIZE = RBYTES*W*LN(MYINDEX)*H1*H2
C
C     Precalculate SENDRECV partners and other information needed by 
C     transpose algorithm.
      CALL SRTRNS_INIT(MAPSIZE, MAP, LM, LN, MYINDEX, SENDDEX,
     &                 SENDCOLS, SENDNODE, RECVDEX, RECVROWS, RECVNODE,
     &                 ORDER) 
C
C     Construct transpose using O(P) SENDRECV algorithm.
      DO STEP=1,MAPSIZE-1
C
C       move components.
        SENDSIZE = SENDCOLS(STEP)*COLSIZE
        RECVSIZE = RECVROWS(STEP)*ROWSIZE
        CALL SENDRECV(COMMOPT, PROTOPT, ORDER(STEP), ME,
     &                BASE, SENDNODE(STEP), SENDSIZE,
     &                A(1,SENDDEX(STEP)), RECVNODE(STEP), RECVSIZE, WS)
C
C       Transpose received component into B.
        CALL TRANS(DIR, W, M, H1, H2, RECVROWS(STEP), LN(MYINDEX), MX,
     &             RECVDEX(STEP), WS, B) 
C
      ENDDO
C
C     Finally, transpose last component from A to B.
      CALL TRANS(DIR, W, M, H1, H2, RECVROWS(0), LN(MYINDEX), MX, 
     &           RECVDEX(0), A(1,SENDDEX(0)), B)  
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SRTRNS2(COMMOPT, PROTOPT, MAPSIZE, MAP, 
     &                   MYINDEX, BASE, DIR, W, M, N, H1, H2, LM, LN,
     &                   ML, NL, MX, A, WS, B) 
C
C This routine computes B = transpose(A) using an O(P) "send/recv"
C transpose algorithm with recv-ahead,  where each step consists of
C sending a message to one processor and receiving a message from
C another. Here
C  A is a matrix of size (W,M,H1,H2,N) distributed by rows and
C  B is a matrix of size (W,N,H1,H2,M) distributed by rows
C over MAPSIZE processors, and each processor has part of A and B as 
C follows:
C  A(W,LM(MYINDEX),H1,H2,N): Processor I has LM(I) rows of A; 
C                            LM(0) + ... + LM(MAPSIZE-1) = M.
C  B(W,LN(MYINDEX),H1,H2,M): Processor I has LN(I) rows of B; 
C                            LN(0) + ... + LN(MAPSIZE-1) = N.
C W is 1 or 2, depending on whether the arrays are REAL or COMPLEX.
C
C Alternative reorganizations of B are also supported, and are 
C determined by the parameter DIR. DIR specifies where TRANSPOSE is 
C called from, allowing the routine to order B as required for 
C subsequent stages in PSTSWM: 
C  DIR=-1: B(W,MX,LN,H1,H2)  (Used after real forward transpose.)
C  DIR=+1: B(W,MX,M,H1,H2)   (Used after real backward transpose.)
C  DIR=-2: B(W,MX,H1,M,H2)   (Used after complex forward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=+2: B(W,MX,H1,LN,H2)  (Used after complex backward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=-3: B(W,MX,LN,M,H2)   (Used after complex forward transpose
C                             in distributed FFT/transpose LT algorithm)
C  DIR=+3: B(W,MX,M,LN,H2)   (Used after complex backward transpose
C                             in distributed FFT/transpose LT algorithm)
C The MAP array defines the processor subset and ordering to use.
C
C Communication options (COMMOPT) for SRTRNS2 include:
C  IF (COMMOPT .EQ. 0) simple SENDRECV: send/recv/trans
C  IF (COMMOPT .EQ. 1) ordered SENDRECV: [send/recv]|[recv/send]/trans
C  IF (COMMOPT .EQ. 2) delayed receive SENDRECV:
C    all sends/all recvs/trans
C Communication protocol options (PROTOPT) for SRTRNS2 include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  
C    nonblocking receive and recv-ahead
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: SRTRANS
C calls: SRTRNS_INIT, SR1, SR2, SR3, TRANS
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
#     include "params.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C number of processors in subset
      INTEGER MAPSIZE
C processor subset (and processor ordering)
      INTEGER MAP(0:MAPSIZE-1)
C index of "me" in MAP array
      INTEGER MYINDEX
C message type offset to use in interprocessor communication
      INTEGER BASE
C context in which transpose occurs, and hence required data organization
      INTEGER DIR
C number of reals in datatype (1: REAL, 2: COMPLEX)
      INTEGER W
C dimensions of input and output arrays on processors in MAP array
      INTEGER M, N, H1, H2, ML, NL, MX
      INTEGER LM(0:MAPSIZE-1)
      INTEGER LN(0:MAPSIZE-1)
C Local component of the array that is to be transposed.
C (organized as REAL (W,LM(MYINDEX),H1,H2,N))
      REAL A(W*ML*H1*H2,N)
C
C     Work Space
C
C message buffers
C (organized as REAL (W,LN(MYINDEX),H1,H2,M))
      REAL WS(W*NL*H1*H2,M)
C
C     Output
C
C Local component of the transposed array.
C (organized as REAL (W,MX,NL,H1,H2), (W,MX,M,H1,H2), (W,MX,H1,M,H2),
C  (W,MX,H1,NL,H2), (W,MX,NL,M,H2), or (W,MX,M,NL,H2)) 
      REAL B(1)
C
C---- Local Variables --------------------------------------------------
C
C true processor id for "me"
      INTEGER ME
C number of bytes in a column segment being sent and in a row segment
C being received. 
      INTEGER COLSIZE, ROWSIZE
C size of message being sent and received during a SENDRECV
      INTEGER SENDSIZE, RECVSIZE
C arrays indicating beginning index, size, and destination of 
C message being sent during a given SENDRECV
      INTEGER SENDDEX(0:NPROCSX-1), SENDCOLS(0:NPROCSX-1),
     &        SENDNODE(NPROCSX-1)
C arrays indicating beginning index, size, and destination of 
C message being received during a given SENDRECV 
      INTEGER RECVDEX(0:NPROCSX-1), RECVROWS(0:NPROCSX-1),
     &        RECVNODE(NPROCSX-1)
C array indicating whether this processor sends or receives first
C during a SENDRECV at a given step (for synchronous communication)
      INTEGER ORDER(NPROCSX-1)
C loop index
      INTEGER STEP
C
C---- Executable Statements --------------------------------------------
C
C     Identify who I am.
      ME = MAP(MYINDEX)
C
C     Calculate length of a single column segment (for sending) and a
C     single row segment (for receiving), in bytes.
      COLSIZE = RBYTES*W*LM(MYINDEX)*H1*H2
      ROWSIZE = RBYTES*W*LN(MYINDEX)*H1*H2
C
C     Precalculate SENDRECV partners and other information needed by 
C     transpose algorithm.
      CALL SRTRNS_INIT(MAPSIZE, MAP, LM, LN, MYINDEX, SENDDEX,
     &                 SENDCOLS, SENDNODE, RECVDEX, RECVROWS, RECVNODE,
     &                 ORDER) 
C
C     Post receive requests.
      DO STEP=1,MAPSIZE-1
        RECVSIZE = RECVROWS(STEP)*ROWSIZE
        CALL SR1(PROTOPT, .TRUE., ME, BASE, RECVNODE(STEP),
     &           RECVSIZE, WS(1,RECVDEX(STEP)))
      ENDDO
C
C     Construct transpose using O(P) SENDRECV algorithm.
      DO STEP=1,MAPSIZE-1
C
C       Initiate send and receive
        SENDSIZE = SENDCOLS(STEP)*COLSIZE
        RECVSIZE = RECVROWS(STEP)*ROWSIZE
        CALL SR2(COMMOPT, PROTOPT, .TRUE., ORDER(STEP), ME, 
     &           BASE, SENDNODE(STEP), SENDSIZE, A(1,SENDDEX(STEP)),
     &           RECVNODE(STEP), RECVSIZE, WS(1,RECVDEX(STEP))) 
C
      ENDDO
C
C     Wait until outstanding send and receive operations are complete.
      DO STEP=1,MAPSIZE-1
        RECVSIZE = RECVROWS(STEP)*ROWSIZE
        CALL SR3(COMMOPT, PROTOPT, ME, BASE, SENDNODE(STEP),
     &           RECVNODE(STEP), RECVSIZE, WS(1,RECVDEX(STEP)))
      ENDDO
C
      DO STEP=1,MAPSIZE-1
C       Transpose received component into B.
        CALL TRANS(DIR, W, M, H1, H2, RECVROWS(STEP), LN(MYINDEX), MX,
     &             RECVDEX(STEP), WS(1,RECVDEX(STEP)), B) 
      ENDDO
C
C     Transpose last component from A to B.
      CALL TRANS(DIR, W, M, H1, H2, RECVROWS(0), LN(MYINDEX), MX, 
     &           RECVDEX(0), A(1,SENDDEX(0)), B)   
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SRTRNS_INIT(MAPSIZE, MAP, LM, LN, MYINDEX, SENDDEX,
     &                       SENDCOLS, SENDNODE, RECVDEX, RECVROWS, 
     &                       RECVNODE, ORDER)
C
C This routine calculates SENDRECV partners and other information needed
C by the O(P) send/recv transpose algorithm. 
C
C called by: SRTRNS1, SRTRNS2
C calls:
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C number of processors in subset
      INTEGER MAPSIZE
C processor subset (and processor ordering)
      INTEGER MAP(0:MAPSIZE-1)
C number of local "last" indices for input and output arrays in 
C transpose for each processor
      INTEGER LM(0:MAPSIZE-1), LN(0:MAPSIZE-1)
C index of "me" in map and index arrays
      INTEGER MYINDEX
C
C     Output
C
C arrays indicating beginning index, size, and destination of 
C message being sent during a given SENDRECV
      INTEGER SENDDEX(0:MAPSIZE-1), SENDCOLS(0:MAPSIZE-1),
     &        SENDNODE(MAPSIZE-1)
C arrays indicating beginning index, size, and destination of 
C message being received during a given SENDRECV 
      INTEGER RECVDEX(0:MAPSIZE-1), RECVROWS(0:MAPSIZE-1),
     &        RECVNODE(MAPSIZE-1)
C array indicating whether this processor sends or receives first
C during a SENDRECV at a given step (for synchronous communication)
      INTEGER ORDER(MAPSIZE-1)
C
C---- Local Variables --------------------------------------------------
C
C SENDRECV distance, destination, and source indices
      INTEGER I, IDEST, ISRC
C length, start, and location of start for cycle
C determined by MOD(DEX+I,MAPSIZE) iteration
      INTEGER NUM, MIN, MINLOC
C index for calculating cycle
      INTEGER NEXTDEX
C
C---- Executable Statements -------------------------------------------
C
C     Calculate indices for incoming and outgoing messages
C     (using RECVROWS and SENDCOLS as temporary storage).
      RECVROWS(0) = 1
      SENDCOLS(0) = 1
      DO I=1,MAPSIZE-1
        RECVROWS(I) = RECVROWS(I-1) + LM(I-1)
        SENDCOLS(I) = SENDCOLS(I-1) + LN(I-1)
      ENDDO
C
C     Determine indices for local data.
      SENDDEX(0) = SENDCOLS(MYINDEX)
      RECVDEX(0) = RECVROWS(MYINDEX)
C
      DO I=1,MAPSIZE-1
C
C       Compute destination index and message pointer.
        IDEST = MOD(MYINDEX+I,MAPSIZE)
        SENDDEX(I) = SENDCOLS(IDEST)
C
C       Save destination index.
        SENDNODE(I) = IDEST
C
C       Compute source index and message pointer.
        ISRC =  MOD(MYINDEX-I+MAPSIZE,MAPSIZE)
        RECVDEX(I) = RECVROWS(ISRC)
C
C       Save source index.
        RECVNODE(I) = ISRC
C
C       Identify location of MYINDEX in local cycle and use to
C       determine order.
        NUM     = 1
        NEXTDEX = IDEST
        MIN     = NEXTDEX
        MINLOC  = 1
        DO WHILE (NEXTDEX .NE. MYINDEX)
          NEXTDEX = MOD(NEXTDEX+I,MAPSIZE)
          NUM = NUM+1
          IF (NEXTDEX .LT. MIN) THEN
            MIN = NEXTDEX
            MINLOC = NUM
          ENDIF
        ENDDO
        ORDER(I) = MOD(NUM-MINLOC,2)
C
      ENDDO
C
C     Determine sizes for local data.
      SENDCOLS(0) = LN(MYINDEX)
      RECVROWS(0) = LM(MYINDEX)
C
      DO I=1,MAPSIZE-1
C
C       Determine source and destination sizes for messages.
        SENDCOLS(I) = LN(SENDNODE(I))
        RECVROWS(I) = LM(RECVNODE(I))
C
C       Save source and destination ids.
        SENDNODE(I) = MAP(SENDNODE(I))
        RECVNODE(I) = MAP(RECVNODE(I))
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
