C#######################################################################
C PSTSWM Version 1.0 (8/1/93)                                          #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
      SUBROUTINE SRTRANS(COMMOPT, PROTOPT, FORCETYPE, MAPSIZE, MAP,
     &                   MYINDEX, BASE, DIR, W, M, N, H1, H2, ML, NL,
     &                   MAX, A, WS, B) 
C
C This subroutine calls routines that compute B = transpose(A) using
C an O(P) "send/recv" transpose algorithm, where each step consists of
C sending a message to one processor and receiving a message from
C another. Here
C  A is a matrix of size (W,M,H1,H2,N) distributed by rows and
C  B is a matrix of size (W,N,H1,H2,M) distributed by rows
C over MAPSIZE processors, and each processor has part of A and B as 
C follows:
C  A(W,ML,H1,H2,N): Each processor has ML = (M/P or M/P+1) rows of A; 
C                   excess rows are allocated to lower-numbered nodes.
C  B(W,NL,H1,H2,M): Each processor has NL = (N/P or N/P+1) rows of B; 
C                   excess rows are allocated to lower-numbered nodes.
C W is 1 or 2, depending on whether the arrays are REAL or COMPLEX.
C
C Alternative reorganizations of B are also supported, and are 
C determined by the parameter DIR. DIR specifies where TRANSPOSE is 
C called from, allowing the routine to order B as required for 
C subsequent stages in PSTSWM: 
C  DIR=-1: B(W,MAX,NL,H1,H2)  (Used after real forward transpose.)
C  DIR=+1: B(W,MAX,M,H1,H2)   (Used after real backward transpose.)
C  DIR=-2: B(W,MAX,H1,M,H2)   (Used after complex forward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=+2: B(W,MAX,H1,NL,H2)  (Used after complex backward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=-3: B(W,MAX,NL,M,H2)   (Used after complex forward transpose
C                             in distributed FFT/transpose LT algorithm)
C  DIR=+3: B(W,MAX,M,NL,H2)   (Used after complex backward transpose
C                             in distributed FFT/transpose LT algorithm)
C The MAP array defines the processor subset and ordering to use.
C
C Communication options (COMMOPT) for SRTRANS include:
C  IF (COMMOPT .EQ. 0) simple SENDRECV: send/recv/sum
C  IF (COMMOPT .EQ. 1) ordered SENDRECV: [send/recv]|[recv/send]/sum
C  IF (COMMOPT .EQ. 2) simple SENDRECV with recv-ahead
C  IF (COMMOPT .EQ. 3) ordered SENDRECV with recv-ahead
C Communication protocol options (PROTOPT) for SRTRANS include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: TRANSPOSE
C calls: SRTRNS1, SRTRNS2
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C forcetype message type offset
      INTEGER FORCETYPE
C number of processors in subset
      INTEGER MAPSIZE
C processor subset (and processor ordering)
      INTEGER MAP(0:MAPSIZE-1)
C index of "me" in MAP array
      INTEGER MYINDEX
C message type offset to use in interprocessor communication
      INTEGER BASE
C context in which transpose occurs, and hence required data organization
      INTEGER DIR
C number of reals in datatype (1: REAL, 2: COMPLEX)
      INTEGER W
C dimensions of input and output arrays
      INTEGER M, N, H1, H2, ML, NL, MAX
C local component of the array that is to be transposed
      REAL A(W,ML,H1,H2,N)
C
C     Work Space
C
C message buffers
      REAL WS(W,NL,H1,H2,M)
C
C     Output
C
C local component of the transposed array.
C (organized as REAL (W,MAX,NL,H1,H2), (W,MAX,M,H1,H2), (W,MAX,H1,M,H2),
C  (W,MAX,H1,NL,H2), (W,MAX,NL,M,H2), or (W,MAX,M,NL,H2)) 
      REAL B(1)
C
C---- Executable Statements --------------------------------------------
C
      IF (MAPSIZE .GT. 1) THEN
C       Compute transpose.
C
        IF (COMMOPT .LE. 1) THEN
C         no recv-ahead algorithms
          CALL SRTRNS1(COMMOPT, PROTOPT, FORCETYPE, MAPSIZE, MAP,
     &                 MYINDEX, BASE, DIR, W, M, N, H1, H2, ML, NL, MAX,
     &                 A, WS, B) 
C
        ELSEIF (COMMOPT .LE. 3) THEN
C         recv-ahead algorithms
          CALL SRTRNS2(COMMOPT-2, PROTOPT, FORCETYPE, MAPSIZE, MAP,
     &                 MYINDEX, BASE, DIR, W, M, N, H1, H2, ML, NL, MAX,
     &                 A, WS, B) 
C
        ELSE
C         illegal communication option specified
          WRITE(0,100) MAP(MYINDEX), COMMOPT
  100     FORMAT (/,' PSTSWM: FATAL ERROR IN SUBROUTINE SRTRANS ',/,
     &            ' ILLEGAL COMMUNICATION OPTION SPECIFIED',/,
     &            ' PROCID = ',I4,' COMMOPT = ',I4)
          STOP
C
        ENDIF
C
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SRTRNS1(COMMOPT, PROTOPT, FORCETYPE, MAPSIZE, MAP, 
     &                   MYINDEX, BASE, DIR, W, M, N, H1, H2, ML, NL,
     &                   MAX, A, WS, B) 
C
C This routine computes B = transpose(A) using an O(P) "send/recv"
C transpose algorithm without recv-ahead, where each step consists of
C sending a message to one processor and receiving a message from
C another. Here
C  A is a matrix of size (W,M,H1,H2,N) distributed by rows and
C  B is a matrix of size (W,N,H1,H2,M) distributed by rows
C over MAPSIZE processors, and each processor has part of A and B as 
C follows:
C  A(W,ML,H1,H2,N): Each processor has ML = (M/P or M/P+1) rows of A; 
C                   excess rows are allocated to lower-numbered nodes.
C  B(W,NL,H1,H2,M): Each processor has NL = (N/P or N/P+1) rows of B; 
C                   excess rows are allocated to lower-numbered nodes.
C W is 1 or 2, depending on whether the arrays are REAL or COMPLEX.
C
C Alternative reorganizations of B are also supported, and are 
C determined by the parameter DIR. DIR specifies where TRANSPOSE is 
C called from, allowing the routine to order B as required for 
C subsequent stages in PSTSWM: 
C  DIR=-1: B(W,MAX,NL,H1,H2)  (Used after real forward transpose.)
C  DIR=+1: B(W,MAX,M,H1,H2)   (Used after real backward transpose.)
C  DIR=-2: B(W,MAX,H1,M,H2)   (Used after complex forward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=+2: B(W,MAX,H1,NL,H2)  (Used after complex backward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=-3: B(W,MAX,NL,M,H2)   (Used after complex forward transpose
C                             in distributed FFT/transpose LT algorithm)
C  DIR=+3: B(W,MAX,M,NL,H2)   (Used after complex backward transpose
C                             in distributed FFT/transpose LT algorithm)
C The MAP array defines the processor subset and ordering to use.
C
C Communication options (COMMOPT) for SRTRNS1 include:
C  IF (COMMOPT .EQ. 0) simple SENDRECV: send/recv/sum
C  IF (COMMOPT .EQ. 1) ordered SENDRECV: [send/recv]|[recv/send]/sum
C Communication protocol options (PROTOPT) for LOGTRNS1 include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: SRTRANS
C calls: SRTRNS_INIT, SENDRECV, TRANS
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
      INCLUDE 'params.i'
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C forcetype message type offset
      INTEGER FORCETYPE
C number of processors in subset
      INTEGER MAPSIZE
C processor subset (and processor ordering)
      INTEGER MAP(0:MAPSIZE-1)
C index of "me" in MAP array
      INTEGER MYINDEX
C message type offset to use in interprocessor communication
      INTEGER BASE
C context in which transpose occurs, and hence required data organization
      INTEGER DIR
C number of reals in datatype (1: REAL, 2: COMPLEX)
      INTEGER W
C dimensions of input and output arrays
      INTEGER M, N, H1, H2, ML, NL, MAX
C Local component of the array that is to be transposed.
C (organized as REAL (W,ML,H1,H2,N))
      REAL A(W*ML*H1*H2,N)
C
C     Work Space
C
C message buffers
C (organized as REAL (W,NL,H1,H2,M))
      REAL WS(W*NL*H1*H2,M)
C
C     Output
C
C Local component of the transposed array.
C (organized as REAL (W,MAX,NL,H1,H2), (W,MAX,M,H1,H2), (W,MAX,H1,M,H2),
C  (W,MAX,H1,NL,H2), (W,MAX,NL,M,H2), or (W,MAX,M,NL,H2)) 
      REAL B(1)
C
C---- Local Variables --------------------------------------------------
C
C true processor id for "me"
      INTEGER ME
C number of bytes in a column segment being sent and in a row segment
C being received. 
      INTEGER COLSIZE, ROWSIZE
C size of message being sent and received during a SENDRECV
      INTEGER SENDSIZE, RECVSIZE
C arrays indicating beginning index, size, and destination of 
C message being sent during a given SENDRECV
      INTEGER SENDDEX(0:NPROCSX-1), SENDCOLS(NPROCSX-1),
     &        SENDNODE(NPROCSX-1)
C arrays indicating beginning index, size, and destination of 
C message being received during a given SENDRECV 
      INTEGER RECVDEX(0:NPROCSX-1), RECVROWS(NPROCSX-1),
     &        RECVNODE(NPROCSX-1)
C array indicating whether this processor sends or receives first
C during a SENDRECV at a given step (for synchronous communication)
      INTEGER ORDER(NPROCSX-1)
C loop index
      INTEGER STEP
C
C---- Executable Statements --------------------------------------------
C
C     Identify who I am.
      ME = MAP(MYINDEX)
C
C     Calculate length of a single column segment (for sending) and a
C     single row segment (for receiving), in bytes.
      COLSIZE = RBYTES*W*ML*H1*H2
      ROWSIZE = RBYTES*W*NL*H1*H2
C
C     Precalculate SENDRECV partners and other information needed by 
C     transpose algorithm.
      CALL SRTRNS_INIT(M, N, MAPSIZE, MAP, MYINDEX, SENDDEX,
     &                 SENDCOLS, SENDNODE, RECVDEX, RECVROWS, RECVNODE,
     &                 ORDER) 
C
C     Construct transpose using O(P) SENDRECV algorithm.
      DO STEP=1,MAPSIZE-1
C
C       move components.
        SENDSIZE = SENDCOLS(STEP)*COLSIZE
        RECVSIZE = RECVROWS(STEP)*ROWSIZE
        CALL SENDRECV(COMMOPT, PROTOPT, FORCETYPE, ORDER(STEP), ME,
     &                BASE, SENDNODE(STEP), SENDSIZE,
     &                A(1,SENDDEX(STEP)), RECVNODE(STEP), RECVSIZE, WS)
C
C       Transpose received component into B.
        CALL TRANS(DIR, W, M, H1, H2, RECVROWS(STEP), NL, MAX,
     &             RECVDEX(STEP), WS, B) 
C
      ENDDO
C
C     Finally, transpose last component from A to B.
      CALL TRANS(DIR, W, M, H1, H2, ML, NL, MAX, RECVDEX(0),
     &           A(1,SENDDEX(0)), B)  
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SRTRNS2(COMMOPT, PROTOPT, FORCETYPE, MAPSIZE, MAP, 
     &                   MYINDEX, BASE, DIR, W, M, N, H1, H2, ML, NL,
     &                   MAX, A, WS, B) 
C
C This routine computes B = transpose(A) using an O(P) "send/recv"
C transpose algorithm with recv-ahead,  where each step consists of
C sending a message to one processor and receiving a message from
C another. Here
C  A is a matrix of size (W,M,H1,H2,N) distributed by rows and
C  B is a matrix of size (W,N,H1,H2,M) distributed by rows
C over MAPSIZE processors, and each processor has part of A and B as 
C follows:
C  A(W,ML,H1,H2,N): Each processor has ML = (M/P or M/P+1) rows of A; 
C                   excess rows are allocated to lower-numbered nodes.
C  B(W,NL,H1,H2,M): Each processor has NL = (N/P or N/P+1) rows of B; 
C                   excess rows are allocated to lower-numbered nodes.
C W is 1 or 2, depending on whether the arrays are REAL or COMPLEX.
C
C Alternative reorganizations of B are also supported, and are 
C determined by the parameter DIR. DIR specifies where TRANSPOSE is 
C called from, allowing the routine to order B as required for 
C subsequent stages in PSTSWM: 
C  DIR=-1: B(W,MAX,NL,H1,H2)  (Used after real forward transpose.)
C  DIR=+1: B(W,MAX,M,H1,H2)   (Used after real backward transpose.)
C  DIR=-2: B(W,MAX,H1,M,H2)   (Used after complex forward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=+2: B(W,MAX,H1,NL,H2)  (Used after complex backward transpose
C                             in transpose FFT/transpose LT algorithm)
C  DIR=-3: B(W,MAX,NL,M,H2)   (Used after complex forward transpose
C                             in distributed FFT/transpose LT algorithm)
C  DIR=+3: B(W,MAX,M,NL,H2)   (Used after complex backward transpose
C                             in distributed FFT/transpose LT algorithm)
C The MAP array defines the processor subset and ordering to use.
C
C Communication options (COMMOPT) for SRTRNS2 include:
C  IF (COMMOPT .EQ. 0) simple SENDRECV: send/recv/sum
C  IF (COMMOPT .EQ. 1) ordered SENDRECV: [send/recv]|[recv/send]/sum
C Communication protocol options (PROTOPT) for SRTRNS2 include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  
C    nonblocking receive and recv-ahead
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: SRTRANS
C calls: SRTRNS_INIT, SR1, SR2, SR3, TRANS
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
      INCLUDE 'params.i'
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C forcetype message type offset
      INTEGER FORCETYPE
C number of processors in subset
      INTEGER MAPSIZE
C processor subset (and processor ordering)
      INTEGER MAP(0:MAPSIZE-1)
C index of "me" in MAP array
      INTEGER MYINDEX
C message type offset to use in interprocessor communication
      INTEGER BASE
C context in which transpose occurs, and hence required data organization
      INTEGER DIR
C number of reals in datatype (1: REAL, 2: COMPLEX)
      INTEGER W
C dimensions of input and output arrays
      INTEGER M, N, H1, H2, ML, NL, MAX
C Local component of the array that is to be transposed.
C (organized as REAL (W,ML,H1,H2,N))
      REAL A(W*ML*H1*H2,N)
C
C     Work Space
C
C message buffers
C (organized as REAL (W,NL,H1,H2,M))
      REAL WS(W*NL*H1*H2,M)
C
C     Output
C
C Local component of the transposed array.
C (organized as REAL (W,MAX,NL,H1,H2), (W,MAX,M,H1,H2), (W,MAX,H1,M,H2),
C  (W,MAX,H1,NL,H2), (W,MAX,NL,M,H2), or (W,MAX,M,NL,H2)) 
      REAL B(1)
C
C---- Local Variables --------------------------------------------------
C
C true processor id for "me"
      INTEGER ME
C number of bytes in a column segment being sent and in a row segment
C being received. 
      INTEGER COLSIZE, ROWSIZE
C size of message being sent and received during a SENDRECV
      INTEGER SENDSIZE, RECVSIZE
C arrays indicating beginning index, size, and destination of 
C message being sent during a given SENDRECV
      INTEGER SENDDEX(0:NPROCSX-1), SENDCOLS(NPROCSX-1),
     &        SENDNODE(NPROCSX-1)
C arrays indicating beginning index, size, and destination of 
C message being received during a given SENDRECV 
      INTEGER RECVDEX(0:NPROCSX-1), RECVROWS(NPROCSX-1),
     &        RECVNODE(NPROCSX-1)
C array indicating whether this processor sends or receives first
C during a SENDRECV at a given step (for synchronous communication)
      INTEGER ORDER(NPROCSX-1)
C loop index
      INTEGER STEP
C
C---- Executable Statements --------------------------------------------
C
C     Identify who I am.
      ME = MAP(MYINDEX)
C
C     Calculate length of a single column segment (for sending) and a
C     single row segment (for receiving), in bytes.
      COLSIZE = RBYTES*W*ML*H1*H2
      ROWSIZE = RBYTES*W*NL*H1*H2
C
C     Precalculate SENDRECV partners and other information needed by 
C     transpose algorithm.
      CALL SRTRNS_INIT(M, N, MAPSIZE, MAP, MYINDEX, SENDDEX,
     &                 SENDCOLS, SENDNODE, RECVDEX, RECVROWS, RECVNODE,
     &                 ORDER) 
C
C     Post receive requests.
      DO STEP=1,MAPSIZE-1
        RECVSIZE = RECVROWS(STEP)*(RBYTES*W*NL*H1*H2)
        CALL SR1(PROTOPT, FORCETYPE, .TRUE., ME, BASE, RECVNODE(STEP),
     &           RECVSIZE, WS(1,RECVDEX(STEP)))
      ENDDO
C
C     Construct transpose using O(P) SENDRECV algorithm.
      DO STEP=1,MAPSIZE-1
C
C       Receive component and initiate corresponding send.
        SENDSIZE = SENDCOLS(STEP)*COLSIZE
        RECVSIZE = RECVROWS(STEP)*ROWSIZE
        CALL SR2(COMMOPT, PROTOPT, FORCETYPE, .TRUE., ORDER(STEP), ME, 
     &           BASE, SENDNODE(STEP), SENDSIZE, A(1,SENDDEX(STEP)),
     &           RECVNODE(STEP), RECVSIZE, WS(1,RECVDEX(STEP))) 
C
C       Transpose received component into B.
        CALL TRANS(DIR, W, M, H1, H2, RECVROWS(STEP), NL, MAX,
     &             RECVDEX(STEP), WS(1,RECVDEX(STEP)), B) 
C
      ENDDO
C
C     Wait until outstanding send operations are complete.
      DO STEP=1,MAPSIZE-1
        CALL SR3(COMMOPT, PROTOPT, FORCETYPE, ME, BASE, RECVNODE(STEP), 
     &           RECVSIZE, WS(1,RECVDEX(STEP)))
      ENDDO
C
C     Transpose last component from A to B.
      CALL TRANS(DIR, W, M, H1, H2, ML, NL, MAX, RECVDEX(0),
     &           A(1,SENDDEX(0)), B)   
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SRTRNS_INIT(M, N, MAPSIZE, MAP, MYINDEX, SENDDEX,
     &                       SENDCOLS, SENDNODE, RECVDEX, RECVROWS, 
     &                       RECVNODE, ORDER)
C
C This routine calculates SENDRECV partners and other information needed
C by the O(P) send/recv transpose algorithm. 
C
C called by: SRTRNS1, SRTRNS2
C calls:
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Arguments --------------------------------------------------------
C
C     Input
C
C size of last indices of input and output arrays in transpose
      INTEGER N, M
C number of processors in subset
      INTEGER MAPSIZE
C processor subset (and processor ordering)
      INTEGER MAP(0:MAPSIZE-1)
C index of "me" in map array
      INTEGER MYINDEX
C
C     Output
C
C arrays indicating beginning index, size, and destination of 
C message being sent during a given SENDRECV
      INTEGER SENDDEX(0:MAPSIZE-1), SENDCOLS(MAPSIZE-1),
     &        SENDNODE(MAPSIZE-1)
C arrays indicating beginning index, size, and destination of 
C message being received during a given SENDRECV 
      INTEGER RECVDEX(0:MAPSIZE-1), RECVROWS(MAPSIZE-1),
     &        RECVNODE(MAPSIZE-1)
C array indicating whether this processor sends or receives first
C during a SENDRECV at a given step (for synchronous communication)
      INTEGER ORDER(MAPSIZE-1)
C
C---- Local Variables --------------------------------------------------
C
C SENDRECV distance, destination, and source indices
      INTEGER I, IDEST, ISRC
C length, start, and location of start for cycle
C determined by MOD(DEX+I,MAPSIZE) iteration
      INTEGER NUM, MIN, MINLOC
C index for calculating cycle
      INTEGER NEXTDEX
C
C---- Executable Statements -------------------------------------------
C
C     Calculate indices for local data.
      SENDDEX(0) = 1 + MYINDEX*(N/MAPSIZE) 
     &               + MIN0(MYINDEX,MOD(N,MAPSIZE))
      RECVDEX(0) = 1 + MYINDEX*(M/MAPSIZE) 
     &               + MIN0(MYINDEX,MOD(M,MAPSIZE))
C
      DO I=1,MAPSIZE-1
C
C       Compute destination id and index and message pointer and size.
        IDEST = MOD(MYINDEX+I,MAPSIZE)
        SENDNODE(I) = MAP(IDEST)
        SENDDEX(I) = 1 + IDEST*(N/MAPSIZE) 
     &                 + MIN0(IDEST,MOD(N,MAPSIZE))
        SENDCOLS(I)= N/MAPSIZE
        IF (IDEST .LT. MOD(N,MAPSIZE)) 
     &    SENDCOLS(I) = SENDCOLS(I) + 1
C
C       Compute source index and id and message pointer and size.
        ISRC =  MOD(MYINDEX-I+MAPSIZE,MAPSIZE)
        RECVNODE(I) = MAP(ISRC)
        RECVDEX(I) = 1 + ISRC*(M/MAPSIZE) 
     &             + MIN0(ISRC,MOD(M,MAPSIZE))
        RECVROWS(I)= M/MAPSIZE
        IF (ISRC .LT. MOD(M,MAPSIZE)) 
     &    RECVROWS(I) = RECVROWS(I) + 1
C
C       Identify location of MYINDEX in local cycle and use to
C       determine order.
        NUM     = 1
        NEXTDEX = IDEST
        MIN     = NEXTDEX
        MINLOC  = 1
        DO WHILE (NEXTDEX .NE. MYINDEX)
          NEXTDEX = MOD(NEXTDEX+I,MAPSIZE)
          NUM = NUM+1
          IF (NEXTDEX .LT. MIN) THEN
            MIN = NEXTDEX
            MINLOC = NUM
          ENDIF
        ENDDO
        ORDER(I) = MOD(NUM-MINLOC,2)
C
      ENDDO
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
