C#######################################################################
C PSTSWM Version 4.0 (12/1/94)                                         #
C  A message-passing benchmark code and parallel algorithm testbed     #
C  that solves the nonlinear shallow water equations using the spectral#
C  transform method.                                                   #
C Written by:                                                          #
C  Patrick Worley of Oak Ridge National Laboratory                     #
C  Ian Foster of Argonne National Laboratory                           #
C Based on the sequential code STSWM 2.0 by James Hack and Ruediger    #
C  Jakob of the National Center for Atmospheric Research.              #
C Research and development funded by the Computer Hardware, Advanced   #
C  Mathematics, and Model Physics (CHAMMP) program of the U.S.         #
C  Department of Energy.                                               # 
C                                                                      #
C Questions and comments should be directed to worley@msr.epm.ornl.gov #
C Please notify and acknowledge the authors in any research or         #
C publications utilizing PSTSWM or any part of the code.               #
C                                                                      #
C NOTICE: Neither the institutions nor the authors make any            #
C representations about the suitability of this software for any       #
C purpose. This software is provided "as is", without express or       #
C implied warranty.                                                    #
C#######################################################################
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C swap.f                                                               C
C                                                                      C
C The following routines implement the following low level             C
C interprocessor communication commands used in PSTSWM using           C
C MPI interprocessor communication commands:                           C
C                                                                      C
C SWAP      - used when swapping data between two processors           C
C SWAP_SEND - used to send messages as part of a swap                  C
C SWAP_RECV - used to receive messages as part of a swap               C
C SWAP_RECVBEGIN - used when initiating a receive as part of a swap    C
C SWAP_RECVEND   - used when completing a receive as part of a swap    C
C SWAP1     - first of three routines that implement swap              C
C SWAP2     - second of three routines that implement swap             C
C SWAP3     - third of three routines that implement swap              C
C                                                                      C
C These routines all have the following communication options:         C
C  IF (COMMOPT .EQ. 0) simple                                          C
C  IF (COMMOPT .EQ. 1) ordered                                         C
C They also have the following communication protocol options          C
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send                 C
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive              C
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype                        C
C  if (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 0) native                     C
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous                C
C                                                                      C
C SWAP_SEND/SWAP_RECV, SWAP_SEND/SWAP_RECVBEGIN/SWAP_RECVEND, and      C
C SWAP1/SWAP2/SWAP3 all represent different ways of partitioning the   C
C basic swap routine.                                                  C
C                                                                      C
C Use of SWAP_SEND and SWAP_RECV allows computation to be "inserted"   C
C between the beginning of the send and the completion of the recv,    C
C but extreme care must be used when using these routines.             C
C If the simple option is used, then SWAP_SEND must precede SWAP_RECV. C
C If the ordered option is used, then                                  C
C IF (ORDER .EQ. 1) SWAP_SEND/SWAP_RECV                                C
C IF (ORDER .NE. 1) SWAP_RECV/SWAP_SEND                                C
C Also, if nonblocking sends are used, then the send may not be        C
C completed until both SWAP_SEND and SWAP_RECV have been called, and   C
C the send buffer should not be modified until then.                   C
C Similarly, if nonblocking receives are used, then the receive may be C
C posted before SWAP_RECV is called, and the receive buffer should not C
C be used (for anything) between calls to SWAP_SEND and SWAP_RECV.     C
C Note that SWAP_SEND/SWAP_RECV can be used to "match" SWAP in         C
C other processes.                                                     C
C                                                                      C
C Use of SWAP1/SWAP2/SWAP3 allows receives used in swap to be posted   C
C ahead of time, and sends and/or receives to be completed just before C
C they are needed.                                                     C
C                                                                      C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SWAP(COMMOPT, PROTOPT, ORDER, ME, MTAG, 
     &                SWAPNODE, SNDLTH, SNDMSG, RCVLTH, RCVMSG)
C
C This subroutine sends the message in the SNDMSG buffer to SWAPNODE,
C and receives a message from SWAPNODE into RCVMSG. 
C
C Communication options (COMMOPT) for SWAP include:
C  IF (COMMOPT .EQ. 0) simple swap: send/recv
C  IF (COMMOPT .EQ. 1) ordered swap: [send/recv]|[recv/send]
C Communication protocol options (PROTOPT) for SWAP include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  if (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 0) native
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: EXCH1, EXCH2, FRFFT1, HALF1, HALF2, IRFFT1, LGTRNS1, 
C            LGTRNS3
C calls: MPI routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
C mpi implementation-specific parameters
#     include "mpif.h"
C
C forcetype message tag offset
      INTEGER FORCETYPE
      PARAMETER (FORCETYPE = 15380)
C
C---- Common Blocks ----------------------------------------------------
C
C picl/mpi interface variables
#     include "picl.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C order of send/recv
      INTEGER ORDER
C processor id
      INTEGER ME
C message tag offset 
C (MTAG+NPROCS should be < FORCETYPE to guarantee correctness)
      INTEGER MTAG
C processor swapping messages with
      INTEGER SWAPNODE
C length of message to be sent
      INTEGER SNDLTH
C message to be sent
      INTEGER SNDMSG(*)
C length of message to be received
      INTEGER RCVLTH
C
C     Output
C
C message received
      INTEGER RCVMSG(*)
C
C---- Local Variables --------------------------------------------------
C
C message request IDs for nonblocking communication calls
      INTEGER RCVID, SNDID
C error return variables
      INTEGER IERR0, IERR1, IERR2, IERR3, IERR4
C array for status values
      INTEGER STATUS(MPI_STATUS_SIZE)
C
C---- Data Statements -------------------------------------------------
C
      DATA IERR0 /0/, IERR1 /0/, IERR2 /0/, IERR3 /0/, IERR4 /0/
C
C---- Executable Statements --------------------------------------------
C
C     Choose communication algorithm.
      IF (COMMOPT .EQ. 0) THEN
C***********************************************************************
C       simple swap: send/recv
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C           simple swap
            CALL MPI_BSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR1)
            DO WHILE (IERR1 .GT. 0)
              CALL MPI_ERROR_CLASS(IERR1, IERR2, IERR0)
              IF (IERR2 .EQ. MPI_ERR_BUFFER) THEN
                CALL MPI_BSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                         SWAPNODE, MTAG, COMM, IERR1)
              ELSE
                IERR1 = -IERR1
              ENDIF
            ENDDO
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
C           Do not block for send, increasing odds that a receive will 
C           be posted before the message arrives.
            CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, SNDID, IERR1)
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, STATUS, IERR2)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
C         IF (PROTOPT .EQ. 2) THEN
C           Post receive before send, increasing odds that recv will
C           be posted before message arrives
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, RCVID, IERR2)
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR1)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
C         ELSE
C           This case is "included" for completeness. There is no
C           advantage to using sendbegin/end here, so code is commented
C           out. There is an advantage to this case in SWAP_SEND, for
C           example. 
C           CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
C    &                     SWAPNODE, MTAG, COMM, RCVID, IERR2)
C           CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
C    &                     SWAPNODE, MTAG, COMM, SNDID, IERR1)
C           CALL MPI_WAIT(RCVID, STATUS, IERR2)
C           CALL MPI_WAIT(SNDID, STATUS, IERR1)
C         ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C    
C         IF (PROTOPT .EQ. 4) THEN
C           Post receive before send to allow use of forcetypes.
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
            CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR3)
            DO WHILE (IERR3 .GT. 0)
              CALL MPI_ERROR_CLASS(IERR3, IERR4, IERR0)
              IF (IERR4 .EQ. MPI_ERR_BUFFER) THEN
                CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                         SWAPNODE, MTAG, COMM, IERR3)
              ELSE
                IERR3 = -IERR3
              ENDIF
            ENDDO
            CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
C         ELSE
C           This case is "included" for completeness. There is no
C           advantage to using sendbegin/end here, so code is commented
C           out. There is an advantage to this case in SWAP_SEND, for
C           example. 
C           CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
C    &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
C           CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
C    &                      SWAPNODE, MTAG, COMM, IERR3)
C           DO WHILE (IERR3 .GT. 0)
C             CALL MPI_ERROR_CLASS(IERR3, IERR4, IERR0)
C             IF (IERR4 .EQ. MPI_ERR_BUFFER) THEN
C               CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
C    &                         SWAPNODE, MTAG, COMM, IERR3)
C             ELSE
C               IERR3 = -IERR3
C             ENDIF
C           ENDDO
C           CALL MPI_RECV(SNDMSG, 0, DATATYPE,
C    &                      SWAPNODE, MTAG, COMM, STATUS, IERR4)
C           CALL MPI_IRSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
C    &                     SWAPNODE, MTAG+FORCETYPE, COMM, SNDID, IERR1)
C           CALL MPI_WAIT(RCVID, STATUS, IERR2)
C           CALL MPI_WAIT(SNDID, STATUS, IERR1)
C         ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         native swap 
          CALL MPI_SENDRECV(SNDMSG, SNDLTH/DATALTH, DATATYPE, SWAPNODE, 
     &                      MTAG, RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR1)
C
        ELSE
C
          WRITE (0,901) COMMOPT, PROTOPT
  901     FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP:',/,
     &             ' UNKNOWN COMMUNICATION PROTOCOL SPECIFIED',/, 
     &             ' COMMOPT = ',I3, ' PROTOPT = ',I3)
          STOP                                                   
C
        ENDIF
C
      ELSEIF (COMMOPT .EQ. 1) THEN
C***********************************************************************
C       ordered swap:
C       IF (ORDER .EQ. 1) send/recv
C       IF (ORDER .NE. 1) recv/send
C       (Deadlock avoidance is the responsibility of the calling
C        program.)
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C
C           ordered swap
            IF (ORDER .EQ. 1) THEN
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
            ELSE
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                       SWAPNODE, MTAG, COMM, STATUS, IERR2)
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
            ENDIF
C
          ELSE
C
C           Do not block for initial send, increasing odds that a
C           receive will be posted before the message arrives.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, SNDID, IERR1)
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                       SWAPNODE, MTAG, COMM, STATUS, IERR2)
              CALL MPI_WAIT(SNDID, STATUS, IERR1)
            ELSE
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
C         IF (PROTOPT .EQ. 2) THEN
C
C           Post the receive before the initial send, increasing odds
C           that the receive will be posted before the message arrives. 
            IF (ORDER .EQ. 1) THEN
              CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                       SWAPNODE, MTAG, COMM, RCVID, IERR2)
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, IERR1)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ELSE
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
            ENDIF
C
C         ELSE
C
C           This case is "included" for completeness. There is no
C           advantage to using sendbegin/end here, so code is commented
C           out. There is an advantage to this case in SWAP_SEND, for
C           example. 
C           IF (ORDER .EQ. 1) THEN
C             CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
C    &                       SWAPNODE, MTAG, COMM, RCVID, IERR2)
C             CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
C    &                       SWAPNODE, MTAG, COMM, SNDID, IERR1)
C             CALL MPI_WAIT(RCVID, STATUS, IERR2)
C             CALL MPI_WAIT(SNDID, STATUS, IERR1)
C           ELSE
C             CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
C    &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
C             CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
C    &                      SWAPNODE, MTAG, COMM, IERR1)
C           ENDIF
C
C         ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
C         IF (PROTOPT .EQ. 4) THEN
C
C           Post the receive before the send to allow use of forcetypes. 
            IF (ORDER .EQ. 1) THEN
              CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
              CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                       SWAPNODE, MTAG, COMM, STATUS, IERR4)
              CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ELSE
              CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
              CALL MPI_SEND(RCVMSG, 0, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, IERR3)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
              CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
            ENDIF
C
C         ELSE
C
C           This case is "included" for completeness. There is no
C           advantage to using sendbegin/end here, so code is commented
C           out. There is an advantage to this case in SWAP_SEND, for
C           example. 
C           IF (ORDER .EQ. 1) THEN
C             CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
C    &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
C             CALL MPI_RECV(SNDMSG, 0, DATATYPE,
C    &                        SWAPNODE, MTAG, COMM, STATUS, IERR4)
C             CALL MPI_IRSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
C    &                     SWAPNODE, MTAG+FORCETYPE, COMM, SNDID, IERR1)
C             CALL MPI_WAIT(RCVID, STATUS, IERR2)
C             CALL MPI_WAIT(SNDID, STATUS, IERR1)
C           ELSE
C             CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
C    &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
C             CALL MPI_SEND(RCVMSG, 0, DATATYPE, 
C    &                       SWAPNODE, MTAG, COMM, IERR3)
C             CALL MPI_WAIT(RCVID, STATUS, IERR2)
C             CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
C    &                       SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
C           ENDIF
C
C         ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         synchronous ordered swap 
          IF (ORDER .EQ. 1) THEN
            CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR1)
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
            CALL MPI_SEND(RCVMSG, 0, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR3)
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR1)
          ENDIF
C
        ELSE
C
C         protocol error
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
C
        ENDIF
C
      ELSE
C***********************************************************************
C       undefined swap option
C***********************************************************************
C
        WRITE (0,900) COMMOPT
  900   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP:',/,
     &           ' UNKNOWN COMMUNICATION OPTION SPECIFIED',/, 
     &           ' COMMOPT = ',I3)                                 
        STOP                                                   
C
      ENDIF
C
      IF ((IERR1 .NE. 0) .OR. (IERR2 .NE. 0) .OR. 
     &    (IERR3 .NE. 0) .OR. (IERR4 .NE. 0)) THEN
        WRITE (0,902) IERR1, IERR2, IERR3, IERR4
  902   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP:',/,
     &           ' NONZERO MPI ROUTINE ERROR CODE RETURNED',/, 
     &           ' IERR1 = ',I3,' IERR2 = ',I3,/,
     &           ' IERR3 = ',I3,' IERR4 = ',I3)
        STOP
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SWAP_SEND(COMMOPT, PROTOPT, ORDER, ME, MTAG, 
     &                     SWAPNODE, SNDLTH, SNDMSG, RCVLTH, RCVMSG)
C
C This subroutine sends the message in the SNDMSG buffer to SWAPNODE 
C as part of a swap operation. The swap is completed after both 
C SWAP_SEND and SWAP_RECV have been called. If nonblocking sends are
C used and (COMMOPT .EQ. 0) .OR. ((COMMOPT .EQ. 1).AND.(ORDER .EQ. 1)), 
C then the send is not guaranteed to be complete until after SWAP_RECV
C is called.
C
C Communication options (COMMOPT) for SWAPSEND/SWAPRECV include:
C  IF (COMMOPT .EQ. 0) simple swap: send/recv
C  IF (COMMOPT .EQ. 1) ordered swap: [send/recv]|[recv/send]
C Communication protocol options (PROTOPT) for SWAPSEND/SWAPRECV
C include: 
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  if (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 0) native
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: EXCH1, EXCH2, FRFFT2, HALF1, HALF2, IRFFT2
C calls: ADDRECV, ADDSEND, MPI routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
C mpi implementation-specific parameters
#     include "mpif.h"
C
C forcetype message tag offset
      INTEGER FORCETYPE
      PARAMETER (FORCETYPE = 15380)
C
C---- Common Blocks ----------------------------------------------------
C
C picl/mpi interface variables
#     include "picl.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C order of send/recv
      INTEGER ORDER
C processor id
      INTEGER ME
C message tag offset 
C (MTAG+NPROCS should be < FORCETYPE to guarantee correctness)
      INTEGER MTAG
C processor swapping messages with
      INTEGER SWAPNODE
C length of message to be sent
      INTEGER SNDLTH
C message to be sent
      INTEGER SNDMSG(*)
C length of message to be received
C (only used for forcetype option)
      INTEGER RCVLTH
C
C     Output
C
C message received
C (only used for forcetype option)
      INTEGER RCVMSG(*)
C
C---- Local Variables -------------------------------------------------
C
C message request IDs for nonblocking communication calls
      INTEGER RCVID, SNDID
C error return variables
      INTEGER IERR0, IERR1, IERR2, IERR3, IERR4
C array for status values
      INTEGER STATUS(MPI_STATUS_SIZE)
C
C---- Data Statements -------------------------------------------------
C
      DATA IERR0 /0/, IERR1 /0/, IERR2 /0/, IERR3 /0/, IERR4 /0/
C
C---- Executable Statements -------------------------------------------
C
C     Choose communication algorithm.
      IF (COMMOPT .EQ. 0) THEN
C***********************************************************************
C       send of simple swap: send/recv
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C           send of simple swap
            CALL MPI_BSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR1)
            DO WHILE (IERR1 .GT. 0)
              CALL MPI_ERROR_CLASS(IERR1, IERR2, IERR0)
              IF (IERR2 .EQ. MPI_ERR_BUFFER) THEN
                CALL MPI_BSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                         SWAPNODE, MTAG, COMM, IERR2)
              ELSE
                IERR1 = -IERR1
              ENDIF
            ENDDO
          ELSE
C           Do not block for send, enabling overlap of computation with
C           communication.
            CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, SNDID, IERR1)
            CALL ADDSEND(SNDID, MTAG, SWAPNODE)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C    
          IF (PROTOPT .EQ. 2) THEN
C           Post the receive before the send, increasing odds that
C           the receive will be posted before the message arrives.
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, RCVID, IERR2)
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR1)
            CALL ADDRECV(RCVID, MTAG, SWAPNODE)
          ELSE
C           Also do not block for the send, enabling overlap of
C           computation with communication.
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, RCVID, IERR2)
            CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, SNDID, IERR1)
            CALL ADDRECV(RCVID, MTAG, SWAPNODE)
            CALL ADDSEND(SNDID, MTAG, SWAPNODE)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C    
          IF (PROTOPT .EQ. 4) THEN
C           Post the receive before the send to allow use of forcetypes. 
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
            CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR3)
            DO WHILE (IERR3 .GT. 0)
              CALL MPI_ERROR_CLASS(IERR3, IERR4, IERR0)
              IF (IERR4 .EQ. MPI_ERR_BUFFER) THEN
                CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                         SWAPNODE, MTAG, COMM, IERR3)
              ELSE
                IERR3 = -IERR3
              ENDIF
            ENDDO
            CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
            CALL ADDRECV(RCVID, MTAG+FORCETYPE, SWAPNODE)
          ELSE
C           Also do not block for the send, enabling overlap of
C           computation with communication.
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
            CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR3)
            DO WHILE (IERR3 .GT. 0)
              CALL MPI_ERROR_CLASS(IERR3, IERR4, IERR0)
              IF (IERR4 .EQ. MPI_ERR_BUFFER) THEN
                CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                         SWAPNODE, MTAG, COMM, IERR3)
              ELSE
                IERR3 = -IERR3
              ENDIF
            ENDDO
            CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_IRSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, SNDID, IERR1)
            CALL ADDRECV(RCVID, MTAG+FORCETYPE, SWAPNODE)
            CALL ADDSEND(SNDID, MTAG+FORCETYPE, SWAPNODE)
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         native swap 
          CALL MPI_SENDRECV(SNDMSG, SNDLTH/DATALTH, DATATYPE, SWAPNODE, 
     &                      MTAG, RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR1)
C
        ELSE
C
          WRITE (0,901) COMMOPT, PROTOPT
  901     FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAPSEND:',/,
     &             ' UNKNOWN COMMUNICATION PROTOCOL SPECIFIED',/, 
     &             ' COMMOPT = ',I3, ' PROTOPT = ',I3)
          STOP                                                   
C
        ENDIF
C
      ELSEIF (COMMOPT .EQ. 1) THEN
C***********************************************************************
C       send of ordered swap:
C       IF (ORDER .EQ. 1) send/recv
C       IF (ORDER .NE. 1) recv/send
C       (Deadlock avoidance is the responsibility of the calling
C        program.)
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C           send of ordered swap
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR1)
          ELSE
C           Do not block for the initial send, enabling overlap of 
C           computation with communication.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, SNDID, IERR1)
              CALL ADDSEND(SNDID, MTAG, SWAPNODE)
            ELSE
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
          IF (PROTOPT .EQ. 2) THEN
C
C           Post the receive before the initial send, increasing odds
C           that the receive will be posted before the message arrives.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                       SWAPNODE, MTAG, COMM, RCVID, IERR2)
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, IERR1)
              CALL ADDRECV(RCVID, MTAG, SWAPNODE)
            ELSE
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
            ENDIF
C
          ELSE
C
C           Also do not block for the initial send, enabling overlap of 
C           computation with communication.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                       SWAPNODE, MTAG, COMM, RCVID, IERR2)
              CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, SNDID, IERR1)
              CALL ADDRECV(RCVID, MTAG, SWAPNODE)
              CALL ADDSEND(SNDID, MTAG, SWAPNODE)
            ELSE
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
          IF (PROTOPT .EQ. 4) THEN
C
C           Post the receive before the send to allow use of forcetypes.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
              CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                       SWAPNODE, MTAG, COMM, STATUS, IERR4)
              CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
              CALL ADDRECV(RCVID, MTAG+FORCETYPE, SWAPNODE)
            ELSE
              CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
            ENDIF
C
          ELSE
C
C           Also do not block for the initial send, enabling overlap of 
C           computation with communication.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
              CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                        SWAPNODE, MTAG, COMM, STATUS, IERR4)
              CALL MPI_IRSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, SNDID, IERR1)
              CALL ADDRECV(RCVID, MTAG+FORCETYPE, SWAPNODE)
              CALL ADDSEND(SNDID, MTAG+FORCETYPE, SWAPNODE)
            ELSE
              CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         send of synchronous ordered swap 
          IF (ORDER .EQ. 1) THEN
            CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR1)
          ELSE
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR1)
          ENDIF
C
        ELSE
C
C         protocol error
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
C
        ENDIF
C
      ELSE
C***********************************************************************
C       undefined swap option
C***********************************************************************
C
        WRITE (0,900) COMMOPT
  900   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_SEND:',/,
     &           ' UNKNOWN COMMUNICATION OPTION SPECIFIED',/, 
     &           ' COMMOPT = ',I3)                                 
        STOP                                                   
C
      ENDIF
C
      IF ((IERR1 .NE. 0) .OR. (IERR2 .NE. 0) .OR. 
     &    (IERR3 .NE. 0) .OR. (IERR4 .NE. 0)) THEN
         WRITE (0,902) IERR1, IERR2, IERR3, IERR4
  902   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_SEND:',/,
     &           ' NONZERO MPI ROUTINE ERROR CODE RETURNED',/, 
     &           ' IERR1 = ',I3,' IERR2 = ',I3,/,
     &           ' IERR3 = ',I3,' IERR4 = ',I3)
        STOP
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SWAP_RECV(COMMOPT, PROTOPT, ORDER, ME, MTAG, 
     &                     SWAPNODE, RCVLTH, RCVMSG)
C
C This subroutine receives a message into RCVMSG as part of a swap 
C operation. The swap is complete after both SWAP_SEND and SWAP_RECV 
C have been called.
C
C Communication options (COMMOPT) for SWAPSEND/SWAPRECV include:
C  IF (COMMOPT .EQ. 0) simple swap: send/recv
C  IF (COMMOPT .EQ. 1) ordered swap: [send/recv]|[recv/send]
C Communication protocol options (PROTOPT) for SWAPSEND/SWAPRECV
C include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  if (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 0) native
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: EXCH1, EXCH2, FRFFT2, HALF1, HALF2, IRFFT2
C calls: RMVRECV, RMVSEND, MPI routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
C mpi implementation-specific parameters
#     include "mpif.h"
C
C forcetype message tag offset
      INTEGER FORCETYPE
      PARAMETER (FORCETYPE = 15380)
C
C---- Common Blocks ----------------------------------------------------
C
C picl/mpi interface variables
#     include "picl.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C order of send/recv
      INTEGER ORDER
C processor id
      INTEGER ME
C message tag offset 
C (MTAG+NPROCS should be < FORCETYPE to guarantee correctness)
      INTEGER MTAG
C processor swapping messages with
      INTEGER SWAPNODE
C length of message to be received
      INTEGER RCVLTH
C
C     Output
C
C message received
      INTEGER RCVMSG(*)
C
C---- Local Variables -------------------------------------------------
C
C message request IDs for nonblocking communication calls
      INTEGER RCVID, SNDID
C error return variables
      INTEGER IERR0, IERR1, IERR2, IERR3, IERR4
C array for status values
      INTEGER STATUS(MPI_STATUS_SIZE)
C
C---- External Functions -----------------------------------------------
C
C return nonblocking communication request ids
      EXTERNAL RMVRECV, RMVSEND
      INTEGER RMVRECV, RMVSEND
C
C---- Data Statements -------------------------------------------------
C
      DATA IERR0 /0/, IERR1 /0/, IERR2 /0/, IERR3 /0/, IERR4 /0/
C
C---- Executable Statements --------------------------------------------
C
C     Choose communication algorithm.
      IF (COMMOPT .EQ. 0) THEN
C***********************************************************************
C       receive of simple swap: send/recv
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C           receive of simple swap
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
C           and complete send
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, STATUS, IERR2)
            SNDID = RMVSEND(MTAG, SWAPNODE)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C    
          IF (PROTOPT .EQ. 2) THEN
C           Complete receive.
            RCVID = RMVRECV(MTAG, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also complete send.
            RCVID = RMVRECV(MTAG, SWAPNODE)
            SNDID = RMVSEND(MTAG, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C    
          IF (PROTOPT .EQ. 4) THEN
C           Complete forcetype receive.
            RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also complete forcetype send.
            RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
            SNDID = RMVSEND(MTAG+FORCETYPE, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         receive already complete in "native" swap
C
        ELSE
C
          WRITE (0,901) COMMOPT, PROTOPT
  901     FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAPRECV:',/,
     &             ' UNKNOWN COMMUNICATION PROTOCOL SPECIFIED',/, 
     &             ' COMMOPT = ',I3, ' PROTOPT = ',I3)
          STOP                                                   
C
        ENDIF
C
      ELSEIF (COMMOPT .EQ. 1) THEN
C***********************************************************************
C       receive of ordered swap:
C       IF (ORDER .EQ. 1) receive of send/recv
C       IF (ORDER .NE. 1) receive of recv/send
C       (Deadlock avoidance is the responsibility of the calling
C        program.)
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C           receive of order swap
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
C           Also complete send.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
              SNDID = RMVSEND(MTAG, SWAPNODE)
              CALL MPI_WAIT(SNDID, STATUS, IERR1)
            ELSE
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
          IF (PROTOPT .EQ. 2) THEN
C
C           Complete receive.
            IF (ORDER .EQ. 1) THEN
              RCVID = RMVRECV(MTAG, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ELSE
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
            ENDIF
C
          ELSE
C
C           Also complete send.
            IF (ORDER .EQ. 1) THEN
              RCVID = RMVRECV(MTAG, SWAPNODE)
              SNDID = RMVSEND(MTAG, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
              CALL MPI_WAIT(SNDID, STATUS, IERR1)
            ELSE
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
          IF (PROTOPT .EQ. 4) THEN
C
C           Complete forcetype receive.
            IF (ORDER .EQ. 1) THEN
              RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ELSE
              CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
              CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, IERR3)
              DO WHILE (IERR3 .GT. 0)
                CALL MPI_ERROR_CLASS(IERR3, IERR4, IERR0)
                IF (IERR4 .EQ. MPI_ERR_BUFFER) THEN
                  CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                           SWAPNODE, MTAG, COMM, IERR3)
                ELSE
                  IERR3 = -IERR3
                ENDIF
              ENDDO
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ENDIF
C
          ELSE
C
C           Also complete forcetype send.
            IF (ORDER .EQ. 1) THEN
              RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
              SNDID = RMVSEND(MTAG+FORCETYPE, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
              CALL MPI_WAIT(SNDID, STATUS, IERR1)
            ELSE
              CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
              CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, IERR3)
              DO WHILE (IERR3 .GT. 0)
                CALL MPI_ERROR_CLASS(IERR3, IERR4, IERR0)
                IF (IERR4 .EQ. MPI_ERR_BUFFER) THEN
                  CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                           SWAPNODE, MTAG, COMM, IERR3)
                ELSE
                  IERR3 = -IERR3
                ENDIF
              ENDDO
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         receive of synchronous ordered swap 
          IF (ORDER .EQ. 1) THEN
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
            CALL MPI_SEND(RCVMSG, 0, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR3)
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ENDIF
C
        ELSE
C
C         protocol error
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
C
        ENDIF
C
      ELSE
C***********************************************************************
C       undefined swap option
C***********************************************************************
C
        WRITE (0,900) COMMOPT
  900   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_RECV:',/,
     &           ' UNKNOWN COMMUNICATION OPTION SPECIFIED',/, 
     &           ' COMMOPT = ',I3)                                 
        STOP                                                   
C
      ENDIF
C
      IF ((IERR1 .NE. 0) .OR. (IERR2 .NE. 0) .OR. 
     &    (IERR3 .NE. 0) .OR. (IERR4 .NE. 0)) THEN
         WRITE (0,902) IERR1, IERR2, IERR3, IERR4
  902   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_RECV:',/,
     &           ' NONZERO MPI ROUTINE ERROR CODE RETURNED',/, 
     &           ' IERR1 = ',I3,' IERR2 = ',I3,/,
     &           ' IERR3 = ',I3,' IERR4 = ',I3)
        STOP
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SWAP_RECVBEGIN(COMMOPT, PROTOPT, 
     &                          ORDER, ME, MTAG, SWAPNODE, RCVLTH, 
     &                          RCVMSG)
C
C This subroutine posts a request to receive a message into RCVMSG
C as part of a swap operation. The receive is completed in SWAP_RECVEND,
C and SWAP_RECVBEGIN/SWAP_RECVEND are used with SWAP_SEND to complete
C the swap.
C
C Communication options (COMMOPT) for SWAPSEND/SWAPRECVBEGIN/SWAPRECVEND 
C include: 
C  IF (COMMOPT .EQ. 0) simple swap: send/recv
C  IF (COMMOPT .EQ. 1) ordered swap: [send/recv]|[recv/send]
C Communication protocol options (PROTOPT) for 
C SWAPSEND/SWAPRECVBEGIN/SWAPRECVEND include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  if (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 0) native
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: EXCH2, HALF2
C calls: ADDRECV, MPI routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
C mpi implementation-specific parameters
#     include "mpif.h"
C
C forcetype message tag offset
      INTEGER FORCETYPE
      PARAMETER (FORCETYPE = 15380)
C
C---- Common Blocks ----------------------------------------------------
C
C picl/mpi interface variables
#     include "picl.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C order of send/recv
      INTEGER ORDER
C processor id
      INTEGER ME
C message tag offset 
C (MTAG+NPROCS should be < FORCETYPE to guarantee correctness)
      INTEGER MTAG
C processor swapping messages with
      INTEGER SWAPNODE
C length of message to be received
      INTEGER RCVLTH
C buffer where message is to be received
      INTEGER RCVMSG(*)
C
C---- Local Variables --------------------------------------------------
C
C message request IDs for nonblocking communication calls
      INTEGER RCVID, SNDID
C error return variables
      INTEGER IERR1, IERR2, IERR3, IERR4
C array for status values
      INTEGER STATUS(MPI_STATUS_SIZE)
C
C---- Data Statements -------------------------------------------------
C
      DATA IERR1 /0/, IERR2 /0/, IERR3 /0/, IERR4 /0/
C
C---- Executable Statements --------------------------------------------
C
C     Choose communication algorithm.
      IF (COMMOPT .EQ. 0) THEN
C***********************************************************************
C       recvbegin of simple swap: recvbegin (send/recvend)
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C         SWAP_RECVEND posts the receive in this case.
        ELSEIF (PROTOPT .LE. 5) THEN
C         SWAP_SEND posts the receive in this case.
        ELSEIF (PROTOPT .EQ. 6) THEN
C         no recvbegin in a "native" swap 
        ELSE
          WRITE (0,901) COMMOPT, PROTOPT
  901     FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_RECVBEGIN:',
     &           /,' UNKNOWN COMMUNICATION PROTOCOL SPECIFIED',/, 
     &             ' COMMOPT = ',I3, ' PROTOPT = ',I3)
          STOP                                                   
        ENDIF
C
      ELSEIF (COMMOPT .EQ. 1) THEN
C***********************************************************************
C       recvbegin of ordered swap:
C       IF (ORDER .EQ. 1) recvbegin (send/recvend)
C       IF (ORDER .NE. 1) recvbegin (recvend/send)
C       (Deadlock avoidance is the responsibility of the calling
C        program.)
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
C         SWAP_RECVEND posts the receive in this case.
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
          IF (ORDER .EQ. 1) THEN
C           SWAP_SEND posts the receive in this case.
          ELSE
C           Post the receive, enabling overlap of computation with 
C           communication between SWAP_RECVBEGIN and SWAP_RECVEND.
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, RCVID, IERR2)
            CALL ADDRECV(RCVID, MTAG, SWAPNODE)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
          IF (ORDER .EQ. 1) THEN
C           SWAP_SEND posts the receive in this case.
          ELSE
C           Post a forcetype receive, enabling overlap of computation
C           with communication between SWAP_RECVBEGIN and SWAP_RECVEND. 
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
            CALL MPI_SEND(RCVMSG, 0, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR3)
            CALL ADDRECV(RCVID, MTAG+FORCETYPE, SWAPNODE)
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         no recvbegin in a "synchronous" ordered swap 
C
        ELSE
C
C         protocol error
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
C
        ENDIF
C
      ELSE
C***********************************************************************
C       undefined swap option
C***********************************************************************
C
        WRITE (0,900) COMMOPT
  900   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_RECVBEGIN:',
     &         /,' UNKNOWN COMMUNICATION OPTION SPECIFIED',/, 
     &           ' COMMOPT = ',I3)                                 
        STOP                                                   
C
      ENDIF
C
      IF ((IERR2 .NE. 0) .OR. (IERR3 .NE. 0)) THEN
        WRITE (0,902) IERR2, IERR3
  902   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_RECVBEGIN:',/,
     &           ' NONZERO MPI ROUTINE ERROR CODE RETURNED',/, 
     &           ' IERR2 = ',I3,' IERR3 = ',I3)
        STOP
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SWAP_RECVEND(COMMOPT, PROTOPT, ORDER, ME, 
     &                        MTAG, SWAPNODE, RCVLTH, RCVMSG)
C
C This subroutine waits until the message requested in SWAP_RECVBEGIN
C has arrived. SWAP_RECVBEGIN/SWAP_RECVEND are used with SWAP_SEND to 
C implement a swap.
C
C Communication options (COMMOPT) for SWAPSEND/SWAPRECVBEGIN/SWAPRECVEND
C include: 
C  IF (COMMOPT .EQ. 0) simple swap: send/recv
C  IF (COMMOPT .EQ. 1) ordered swap: [send/recv]|[recv/send]
C Communication protocol options (PROTOPT) for 
C SWAPSEND/SWAPRECVBEGIN/SWAPRECVEND include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  if (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 0) native
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: EXCH2, HALF2
C calls: RMVRECV, RMVSEND, MPI routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
C mpi implementation-specific parameters
#     include "mpif.h"
C
C forcetype message tag offset
      INTEGER FORCETYPE
      PARAMETER (FORCETYPE = 15380)
C
C---- Common Blocks ----------------------------------------------------
C
C picl/mpi interface variables
#     include "picl.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C order of send/recv
      INTEGER ORDER
C processor id
      INTEGER ME
C message tag offset 
C (MTAG+NPROCS should be < FORCETYPE to guarantee correctness)
      INTEGER MTAG
C processor swapping messages with
      INTEGER SWAPNODE
C length of message to be received
      INTEGER RCVLTH
C
C     Output
C
C message received
      INTEGER RCVMSG(*)
C
C---- Local Variables --------------------------------------------------
C
C message request IDs for nonblocking communication calls
      INTEGER RCVID, SNDID
C error return variables
      INTEGER IERR1, IERR2, IERR3, IERR4
C array for status values
      INTEGER STATUS(MPI_STATUS_SIZE)
C
C---- External Functions -----------------------------------------------
C
C return nonblocking communication request ids
      EXTERNAL RMVRECV, RMVSEND
      INTEGER RMVRECV, RMVSEND
C
C---- Data Statements -------------------------------------------------
C
      DATA IERR1 /0/, IERR2 /0/, IERR3 /0/, IERR4 /0/
C
C---- Executable Statements --------------------------------------------
C
C     Choose communication algorithm.
      IF (COMMOPT .EQ. 0) THEN
C***********************************************************************
C       recvend of simple swap: (recvbegin/send) recvend
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C           Complete "recvend" of blocking receive protocol.
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
C           Also complete the send.
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, STATUS, IERR2)
            SNDID = RMVSEND(MTAG, SWAPNODE)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
          IF (PROTOPT .EQ. 2) THEN
C           Complete receive of simple swap.
            RCVID = RMVRECV(MTAG, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also complete the send.
            RCVID = RMVRECV(MTAG, SWAPNODE)
            SNDID = RMVSEND(MTAG, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
          IF (PROTOPT .EQ. 4) THEN
C           Complete forcetype receive.
            RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also complete forcetype send.
            RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
            SNDID = RMVSEND(MTAG+FORCETYPE, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         no recvend in a "native" swap 
C
        ELSE
C
          WRITE (0,901) COMMOPT, PROTOPT
  901     FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_RECVEND:',
     &           /,' UNKNOWN COMMUNICATION PROTOCOL SPECIFIED',/, 
     &             ' COMMOPT = ',I3, ' PROTOPT = ',I3)
          STOP                                                   
C
        ENDIF
C
      ELSEIF (COMMOPT .EQ. 1) THEN
C***********************************************************************
C       recvend of ordered swap:
C       IF (ORDER .EQ. 1) (recvbegin/send) recvend
C       IF (ORDER .NE. 1) (recvbegin) recvend (send)
C       (Deadlock avoidance is the responsibility of the calling
C        program.)
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C           Complete "recvend" of blocking receive protocol.
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
C           Also complete the initial send.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
              SNDID = RMVSEND(MTAG, SWAPNODE)
              CALL MPI_WAIT(SNDID, STATUS, IERR1)
            ELSE
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
            ENDIF
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
          IF (PROTOPT .EQ. 2) THEN
C           Complete receive of ordered swap.
            RCVID = RMVRECV(MTAG, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also complete initial send.
            IF (ORDER .EQ. 1) THEN
              RCVID = RMVRECV(MTAG, SWAPNODE)
              SNDID = RMVSEND(MTAG, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
              CALL MPI_WAIT(SNDID, STATUS, IERR1)
            ELSE
              RCVID = RMVRECV(MTAG, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
          IF (PROTOPT .EQ. 4) THEN
C           Complete forcetype receive.
            RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also complete initial forcetype send.
            IF (ORDER .EQ. 1) THEN
              RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
              SNDID = RMVSEND(MTAG+FORCETYPE, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
              CALL MPI_WAIT(SNDID, STATUS, IERR1)
            ELSE
              RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ENDIF
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         recvend of "synchronous" ordered swap 
          IF (ORDER .EQ. 1) THEN
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
            CALL MPI_SEND(RCVMSG, 0, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR3)
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ENDIF
C
        ELSE
C
C         protocol error
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
C
        ENDIF
C
      ELSE
C***********************************************************************
C       undefined swap option
C***********************************************************************
C
        WRITE (0,900) COMMOPT
  900   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_RECVEND:',
     &         /,' UNKNOWN COMMUNICATION OPTION SPECIFIED',/, 
     &           ' COMMOPT = ',I3)                                 
        STOP                                                   
C
      ENDIF
C
      IF ((IERR1 .NE. 0) .OR. (IERR2 .NE. 0) .OR. 
     &    (IERR3 .NE. 0)) THEN
        WRITE (0,902) IERR1, IERR2, IERR3
  902   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP_RECVEND:',/,
     &           ' NONZERO MPI ROUTINE ERROR CODE RETURNED',/, 
     &           ' IERR1 = ',I3,' IERR2 = ',I3,/,
     &           ' IERR3 = ',I3)
        STOP
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SWAP1(COMMOPT, PROTOPT, SAFEFORCE, ORDER,
     &                 ME, MTAG, SWAPNODE, RCVLTH, RCVMSG)
C
C This subroutine begins a swap operation that will be completed by
C SWAP2 and SWAP3. It posts a receive and sends handshaking messages
C when forcetypes are used. 
C
C Communication options (COMMOPT) for SWAP1/SWAP2/SWAP3 include:
C  IF (COMMOPT .EQ. 0) simple swap: send/recv
C  IF (COMMOPT .EQ. 1) ordered swap: [send/recv]|[recv/send]
C  IF (COMMOPT .EQ. 2) delayed-recv swap: send ... recv
C Communication protocol options (PROTOPT) for SWAP1/SWAP2/SWAP3
C include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  if (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 0) native
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: EXCH2, HALF2, LGTRNS2, LGTRNS4
C calls: ADDRECV, MPI routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
C mpi implementation-specific parameters
#     include "mpif.h"
C
C forcetype message tag offset
      INTEGER FORCETYPE
      PARAMETER (FORCETYPE = 15380)
C
C---- Common Blocks ----------------------------------------------------
C
C picl/mpi interface variables
#     include "picl.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C send handshaking message to guarantee correctness of using forcetype
C protocol? (not necessary if user has other guarantees)
      LOGICAL SAFEFORCE
C order of send/recv
      INTEGER ORDER
C processor id
      INTEGER ME
C message tag offset 
C (MTAG+NPROCS should be < FORCETYPE to guarantee correctness)
      INTEGER MTAG
C processor swapping messages with
      INTEGER SWAPNODE
C length of message to be received
      INTEGER RCVLTH
C buffers where messages are to be received
      INTEGER RCVMSG(*)
C
C---- Local Variables --------------------------------------------------
C
C message request IDs for nonblocking communication calls
      INTEGER RCVID, SNDID
C error return variables
      INTEGER IERR0, IERR1, IERR2, IERR3, IERR4
C array for status values
      INTEGER STATUS(MPI_STATUS_SIZE)
C
C---- Data Statements -------------------------------------------------
C
      DATA IERR0 /0/, IERR1 /0/, IERR2 /0/, IERR3 /0/, IERR4 /0/
C
C---- Executable Statements --------------------------------------------
C
C     Choose communication algorithm.
      IF ((COMMOPT .EQ. 0) .OR. (COMMOPT .EQ. 2)) THEN
C***********************************************************************
C       simple swap: send/recv
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
C         This procotol does not use nonblocking receive.
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
C         Post the receive before the send, increasing odds that the
C         receive will be posted before the message arrives.
          CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                   SWAPNODE, MTAG, COMM, RCVID, IERR2)
          CALL ADDRECV(RCVID, MTAG, SWAPNODE)
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
C         Post the receive before send to allow use of forcetypes.
          CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                   SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
          IF (SAFEFORCE) THEN
            CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR3)
            DO WHILE (IERR3 .GT. 0)
              CALL MPI_ERROR_CLASS(IERR3, IERR4, IERR0)
              IF (IERR4 .EQ. MPI_ERR_BUFFER) THEN
                CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                         SWAPNODE, MTAG, COMM, IERR3)
              ELSE
                IERR3 = -IERR3
              ENDIF
            ENDDO
          ENDIF
          CALL ADDRECV(RCVID, MTAG+FORCETYPE, SWAPNODE)
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         Native swap does not use nonblocking receive.
C
        ELSE
C
          WRITE (0,901) COMMOPT, PROTOPT
  901     FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP1:',
     &           /,' UNKNOWN COMMUNICATION PROTOCOL SPECIFIED',/, 
     &             ' COMMOPT = ',I3, ' PROTOPT = ',I3)
          STOP                                                   
C
        ENDIF
C
      ELSEIF (COMMOPT .EQ. 1) THEN
C***********************************************************************
C       ordered swap:
C       IF (ORDER .EQ. 1) send/recv
C       IF (ORDER .NE. 1) recv/send
C       (Deadlock avoidance is the responsibility of the calling
C        program.)
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
C         This procotol does not use nonblocking receive.
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
C         Post the receive before the initial send, increasing odds that
C         the receive will be posted before the message arrives.
          CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                   SWAPNODE, MTAG, COMM, RCVID, IERR2)
          CALL ADDRECV(RCVID, MTAG, SWAPNODE)
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
C         Post the receive before the send to allow use of forcetypes. 
          IF (ORDER .EQ. 1) THEN
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
            CALL ADDRECV(RCVID, MTAG+FORCETYPE, SWAPNODE)
          ELSE
            CALL MPI_IRECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, RCVID, IERR2)
            IF (SAFEFORCE) THEN
              CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, IERR3)
              DO WHILE (IERR3 .GT. 0)
                CALL MPI_ERROR_CLASS(IERR3, IERR4, IERR0)
                IF (IERR4 .EQ. MPI_ERR_BUFFER) THEN
                  CALL MPI_BSEND(RCVMSG, 0, DATATYPE, 
     &                           SWAPNODE, MTAG, COMM, IERR3)
                ELSE
                  IERR3 = -IERR3
                ENDIF
              ENDDO
            ENDIF
            CALL ADDRECV(RCVID, MTAG+FORCETYPE, SWAPNODE)
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         Synchronous ordered swap does not use nonblocking receive.
C
        ELSE
C
C         protocol error
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
C
        ENDIF
C
      ELSE
C***********************************************************************
C       undefined swap option
C***********************************************************************
C
        WRITE (0,900) COMMOPT
  900   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP1:',
     &         /,' UNKNOWN COMMUNICATION OPTION SPECIFIED',/, 
     &           ' COMMOPT = ',I3)                                 
        STOP                                                   
C
      ENDIF
C
      IF ((IERR0 .NE. 0) .OR. (IERR2 .NE. 0) .OR. 
     &    (IERR3 .NE. 0) .OR. (IERR4 .NE. 0)) THEN
        WRITE (0,902) IERR1, IERR2, IERR3, IERR4
  902   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP1:',/,
     &           ' NONZERO MPI ROUTINE ERROR CODE RETURNED',/, 
     &           ' IERR0 = ',I3,' IERR2 = ',I3,/,
     &           ' IERR3 = ',I3,' IERR4 = ',I3)
        STOP
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SWAP2(COMMOPT, PROTOPT, SAFEFORCE, ORDER, 
     &                 ME, MTAG, SWAPNODE, SNDLTH, SNDMSG, RCVLTH, 
     &                 RCVMSG)
C
C This subroutine continues the swap operation begun in SWAP1. It
C initiates the send and waits for the receive to complete. 
C
C Communication options (COMMOPT) for SWAP1/SWAP2/SWAP3 include:
C  IF (COMMOPT .EQ. 0) simple swap: send/recv
C  IF (COMMOPT .EQ. 1) ordered swap: [send/recv]|[recv/send]
C  IF (COMMOPT .EQ. 2) delayed-recv swap: send ... recv
C Communication protocol options (PROTOPT) for SWAP1/SWAP2/SWAP3
C include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  if (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 0) native
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: EXCH2, HALF2, LGTRNS2, LGTRNS4
C calls: ADDSEND, RMVRECV, MPI routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
C mpi implementation-specific parameters
#     include "mpif.h"
C
C forcetype message tag offset
      INTEGER FORCETYPE
      PARAMETER (FORCETYPE = 15380)
C
C---- Common Blocks ----------------------------------------------------
C
C picl/mpi interface variables
#     include "picl.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C send handshaking message to guarantee correctness of using forcetype
C protocol? (not necessary if user has other guarantees)
      LOGICAL SAFEFORCE
C order of send/recv
      INTEGER ORDER
C processor id
      INTEGER ME
C message tag offset 
C (MTAG+NPROCS should be < FORCETYPE to guarantee correctness)
      INTEGER MTAG
C processor swapping messages with
      INTEGER SWAPNODE
C length of message to be sent
      INTEGER SNDLTH
C message to be sent
      INTEGER SNDMSG(*)
C length of message to be received
      INTEGER RCVLTH
C
C     Output
C
C message received
      INTEGER RCVMSG(*)
C
C---- Local Variables --------------------------------------------------
C
C message request IDs for nonblocking communication calls
      INTEGER RCVID, SNDID
C error return variables
      INTEGER IERR0, IERR1, IERR2, IERR3, IERR4
C array for status values
      INTEGER STATUS(MPI_STATUS_SIZE)
C
C---- External Functions -----------------------------------------------
C
C return nonblocking receive request id
      EXTERNAL RMVRECV
      INTEGER RMVRECV
C
C---- Data Statements -------------------------------------------------
C
      DATA IERR0 /0/, IERR1 /0/, IERR2 /0/, IERR3 /0/, IERR4 /0/
C
C---- Executable Statements --------------------------------------------
C
C     Choose communication algorithm.
      IF (COMMOPT .EQ. 0) THEN
C***********************************************************************
C       simple swap: send/recv
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C           simple swap
            CALL MPI_BSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR1)
            DO WHILE (IERR1 .GT. 0)
              CALL MPI_ERROR_CLASS(IERR1, IERR2, IERR0)
              IF (IERR2 .EQ. MPI_ERR_BUFFER) THEN
                CALL MPI_BSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                         SWAPNODE, MTAG, COMM, IERR1)
              ELSE
                IERR1 = -IERR1
              ENDIF
            ENDDO
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
C           Do not block for the send, enabling overlap of communication 
C           with computation.
            CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, SNDID, IERR1)
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                     SWAPNODE, MTAG, COMM, STATUS, IERR2)
            CALL ADDSEND(SNDID, MTAG, SWAPNODE)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
          IF (PROTOPT .EQ. 2) THEN
C           Complete outstanding receive,
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR1)
            RCVID = RMVRECV(MTAG, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also do not block for the send, enabling overlap of
C           communication with computation
            CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, SNDID, IERR1)
            CALL ADDSEND(SNDID, MTAG, SWAPNODE)
            RCVID = RMVRECV(MTAG, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C    
          IF (PROTOPT .EQ. 4) THEN
C           Complete forcetype receive.
            IF (SAFEFORCE)
     &        CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
            RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also do not block for send, enabling overlap of
C           communication with computation.
            IF (SAFEFORCE)
     &        CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_IRSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, SNDID, IERR1)
            CALL ADDSEND(SNDID, MTAG+FORCETYPE, SWAPNODE)
            RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         native swap
          CALL MPI_SENDRECV(SNDMSG, SNDLTH/DATALTH, DATATYPE, SWAPNODE, 
     &                      MTAG, RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR1)
C
        ELSE
C
          WRITE (0,901) COMMOPT, PROTOPT
  901     FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP2:',
     &           /,' UNKNOWN COMMUNICATION PROTOCOL SPECIFIED',/, 
     &             ' COMMOPT = ',I3, ' PROTOPT = ',I3)
          STOP                                                   
C
        ENDIF
C
      ELSEIF (COMMOPT .EQ. 1) THEN
C***********************************************************************
C       ordered swap:
C       IF (ORDER .EQ. 1) send/recv
C       IF (ORDER .NE. 1) recv/send
C       (Deadlock avoidance is the responsibility of the calling
C        program.)
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C
C           ordered swap
            IF (ORDER .EQ. 1) THEN
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
            ELSE
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                       SWAPNODE, MTAG, COMM, STATUS, IERR2)
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
            ENDIF
C
          ELSE
C
C           Do not block for the send, enabling overlap of communication 
C           with computation.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, SNDID, IERR1)
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                       SWAPNODE, MTAG, COMM, STATUS, IERR2)
              CALL ADDSEND(SNDID, MTAG, SWAPNODE)
            ELSE
              CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR2)
              CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, SNDID, IERR1)
              CALL ADDSEND(SNDID, MTAG, SWAPNODE)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
          IF (PROTOPT .EQ. 2) THEN
C
C           Complete outstanding receive.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, IERR1)
              RCVID = RMVRECV(MTAG, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ELSE
              RCVID = RMVRECV(MTAG, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
              CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG, COMM, IERR1)
            ENDIF
C
          ELSE
C
C           Also do not block for the send, enabling overlap of
C           communication with computation.
            IF (ORDER .EQ. 1) THEN
              CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, SNDID, IERR1)
              CALL ADDSEND(SNDID, MTAG, SWAPNODE)
              RCVID = RMVRECV(MTAG, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ELSE
              RCVID = RMVRECV(MTAG, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
              CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                       SWAPNODE, MTAG, COMM, SNDID, IERR1)
              CALL ADDSEND(SNDID, MTAG, SWAPNODE)
            ENDIF

          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
          IF (PROTOPT .EQ. 4) THEN
C
C           Complete forcetype receive.
            IF (ORDER .EQ. 1) THEN
              IF (SAFEFORCE)
     &          CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                        SWAPNODE, MTAG, COMM, STATUS, IERR4)
              CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                        SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
              RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ELSE
              RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
              CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                        SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
            ENDIF
C
          ELSE
C
C           Also do not block for the send, enabling overlap of
C           communication with computation.
            IF (ORDER .EQ. 1) THEN
              IF (SAFEFORCE)
     &          CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                        SWAPNODE, MTAG, COMM, STATUS, IERR4)
              CALL MPI_IRSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, SNDID, IERR1)
              CALL ADDSEND(SNDID, MTAG+FORCETYPE, SWAPNODE)
              RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
            ELSE
              RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
              CALL MPI_WAIT(RCVID, STATUS, IERR2)
              CALL MPI_IRSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, SNDID, IERR1)
              CALL ADDSEND(SNDID, MTAG+FORCETYPE, SWAPNODE)
            ENDIF
C
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         synchronous ordered swap 
          IF (ORDER .EQ. 1) THEN
            CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR1)
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
            CALL MPI_SEND(RCVMSG, 0, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR3)
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
            CALL MPI_SEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                    SWAPNODE, MTAG, COMM, IERR1)
          ENDIF
C
        ELSE
C
C         protocol error
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
C
        ENDIF
C
C     Choose communication algorithm.
      ELSEIF (COMMOPT .EQ. 2) THEN
C***********************************************************************
C       delayed swap: send ... recv
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 3) THEN
C
          IF ((PROTOPT .EQ. 0) .OR. (PROTOPT .EQ. 2)) THEN
C           swap send
            CALL MPI_BSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, IERR1)
            DO WHILE (IERR1 .GT. 0)
              CALL MPI_ERROR_CLASS(IERR1, IERR2, IERR0)
              IF (IERR2 .EQ. MPI_ERR_BUFFER) THEN
                CALL MPI_BSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                         SWAPNODE, MTAG, COMM, IERR1)
              ELSE
                IERR1 = -IERR1
              ENDIF
            ENDDO
          ELSE
C           Do not block for the send, enabling overlap of communication 
C           with computation.
            CALL MPI_ISEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG, COMM, SNDID, IERR1)
            CALL ADDSEND(SNDID, MTAG, SWAPNODE)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C    
          IF (PROTOPT .EQ. 4) THEN
C           Send forcetype message.
            IF (SAFEFORCE) 
     &        CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_RSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                      SWAPNODE, MTAG+FORCETYPE, COMM, IERR1)
          ELSE
C           Do not block for send, enabling overlap of
C           communication with computation.
            IF (SAFEFORCE) 
     &        CALL MPI_RECV(SNDMSG, 0, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR4)
            CALL MPI_IRSEND(SNDMSG, SNDLTH/DATALTH, DATATYPE, 
     &                     SWAPNODE, MTAG+FORCETYPE, COMM, SNDID, IERR1)
            CALL ADDSEND(SNDID, MTAG+FORCETYPE, SWAPNODE)
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         native swap
          CALL MPI_SENDRECV(SNDMSG, SNDLTH/DATALTH, DATATYPE, SWAPNODE, 
     &                      MTAG, RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                      SWAPNODE, MTAG, COMM, STATUS, IERR1)
C
        ELSE
C
C         protocol error
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
C
        ENDIF
C
      ELSE
C***********************************************************************
C       undefined swap option
C***********************************************************************
C
        WRITE (0,900) COMMOPT
  900   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP2:',
     &         /,' UNKNOWN COMMUNICATION OPTION SPECIFIED',/, 
     &           ' COMMOPT = ',I3)                                 
        STOP                                                   
C
      ENDIF
C
      IF ((IERR1 .NE. 0) .OR. (IERR2 .NE. 0) .OR. 
     &    (IERR3 .NE. 0) .OR. (IERR4 .NE. 0)) THEN
        WRITE (0,902) IERR1, IERR2, IERR3, IERR4
  902   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP2:',/,
     &           ' NONZERO MPI ROUTINE ERROR CODE RETURNED',/, 
     &           ' IERR1 = ',I3,' IERR2 = ',I3,/,
     &           ' IERR3 = ',I3,' IERR4 = ',I3)
        STOP
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
C
      SUBROUTINE SWAP3(COMMOPT, PROTOPT, ME, MTAG, SWAPNODE,
     &                 RCVLTH, RCVMSG)
C
C This subroutine completes the swap operation begun in SWAP1 and SWAP2.
C It waits until the send and receive request made in SWAP2 have
C completed.
C
C Communication options (COMMOPT) for SWAP1/SWAP2/SWAP3 include:
C  IF (COMMOPT .EQ. 0) simple swap: send/recv
C  IF (COMMOPT .EQ. 1) ordered swap: [send/recv]|[recv/send]
C  IF (COMMOPT .EQ. 2) delayed-recv swap: send ... recv
C Communication protocol options (PROTOPT) for SWAP1/SWAP2/SWAP3 
C include:
C  IF (PROTOPT .EQ. 1, 3, .OR. 5)     nonblocking send   
C  IF (PROTOPT .EQ. 2, 3, 4, .OR. 5)  nonblocking receive
C  IF (PROTOPT .EQ. 4 .OR. 5)         forcetype          
C  if (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 0) native
C  IF (PROTOPT .EQ. 6 .AND. COMMOPT .EQ. 1) synchronous  
C
C called by: EXCH2, HALF2
C calls: RMVRECV, RMVSEND, MPI routines
C
C---- Implicit None ----------------------------------------------------
C
      IMPLICIT NONE
C
C---- Parameters -------------------------------------------------------
C
C mpi implementation-specific parameters
#     include "mpif.h"
C
C forcetype message tag offset
      INTEGER FORCETYPE
      PARAMETER (FORCETYPE = 15380)
C
C---- Common Blocks ----------------------------------------------------
C
C picl/mpi interface variables
#     include "picl.i"
C
C---- Arguments --------------------------------------------------------
C
C     Input 
C
C communication algorithm option
      INTEGER COMMOPT
C communication protocol option 
      INTEGER PROTOPT
C processor id
      INTEGER ME
C message tag offset 
C (MTAG+NPROCS should be < FORCETYPE to guarantee correctness)
      INTEGER MTAG
C processor swapping messages with
      INTEGER SWAPNODE
C length of message to be received
      INTEGER RCVLTH
C
C     Output
C
C message received
      INTEGER RCVMSG(*)
C
C---- Local Variables --------------------------------------------------
C
C message request IDs for nonblocking communication calls
      INTEGER RCVID, SNDID
C error return variables
      INTEGER IERR1, IERR2, IERR3, IERR4
C array for status values
      INTEGER STATUS(MPI_STATUS_SIZE)
C
C---- External Functions -----------------------------------------------
C
C return nonblocking communication request ids
      EXTERNAL RMVRECV, RMVSEND
      INTEGER RMVRECV, RMVSEND
C
C---- Data Statements -------------------------------------------------
C
      DATA IERR1 /0/, IERR2 /0/, IERR3 /0/, IERR4 /0/
C
C---- Executable Statements --------------------------------------------
C
C     Choose communication algorithm.
      IF (COMMOPT .EQ. 0) THEN
C***********************************************************************
C       simple swap: send/recv
C***********************************************************************
C
C       Choose communication protocol.
C
C       Complete send for nonblocking send protocols.
        IF ((PROTOPT .EQ. 1) .OR. (PROTOPT .EQ. 3)) THEN
          SNDID = RMVSEND(MTAG, SWAPNODE)
          CALL MPI_WAIT(SNDID, STATUS, IERR1)
        ELSEIF (PROTOPT .EQ. 5) THEN
          SNDID = RMVSEND(MTAG+FORCETYPE, SWAPNODE)
          CALL MPI_WAIT(SNDID, STATUS, IERR1)
        ELSEIF (PROTOPT .GT. 6) THEN
          WRITE (0,901) COMMOPT, PROTOPT
  901     FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP3:',
     &           /,' UNKNOWN COMMUNICATION PROTOCOL SPECIFIED',/, 
     &             ' COMMOPT = ',I3, ' PROTOPT = ',I3)
          STOP                                                   
        ENDIF
C
      ELSEIF (COMMOPT .EQ. 1) THEN
C***********************************************************************
C       ordered swap:
C       IF (ORDER .EQ. 1) send/recv
C       IF (ORDER .NE. 1) recv/send
C       (deadlock avoidance is the responsibility of the calling
C        program)
C***********************************************************************
C
C       Choose communication protocol.
C
C       Complete send for nonblocking send protocols.
        IF ((PROTOPT .EQ. 1) .OR. (PROTOPT .EQ. 3)) THEN
          SNDID = RMVSEND(MTAG, SWAPNODE)
          CALL MPI_WAIT(SNDID, STATUS, IERR1)
        ELSEIF (PROTOPT .EQ. 5) THEN
          SNDID = RMVSEND(MTAG+FORCETYPE, SWAPNODE)
          CALL MPI_WAIT(SNDID, STATUS, IERR1)
        ELSEIF (PROTOPT .GT. 6) THEN
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
        ENDIF
C
      ELSEIF (COMMOPT .EQ. 2) THEN
C***********************************************************************
C       delayed-recv swap: recvbegin ... send ... recvend
C***********************************************************************
C
C       Choose communication protocol.
        IF (PROTOPT .LE. 1) THEN
C
          IF (PROTOPT .EQ. 0) THEN
C           Receive message.
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
          ELSE
C           Also complete send.
            CALL MPI_RECV(RCVMSG, RCVLTH/DATALTH, DATATYPE,
     &                    SWAPNODE, MTAG, COMM, STATUS, IERR2)
            SNDID = RMVSEND(MTAG, SWAPNODE)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 3) THEN
C
          IF (PROTOPT .EQ. 2) THEN
C           Complete receive.
            RCVID = RMVRECV(MTAG, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also complete send.
            RCVID = RMVRECV(MTAG, SWAPNODE)
            SNDID = RMVSEND(MTAG, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .LE. 5) THEN
C
          IF (PROTOPT .EQ. 4) THEN
C           Complete receive.
            RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
          ELSE
C           Also complete send.
            RCVID = RMVRECV(MTAG+FORCETYPE, SWAPNODE)
            SNDID = RMVSEND(MTAG+FORCETYPE, SWAPNODE)
            CALL MPI_WAIT(RCVID, STATUS, IERR2)
            CALL MPI_WAIT(SNDID, STATUS, IERR1)
          ENDIF
C
        ELSEIF (PROTOPT .EQ. 6) THEN
C
C         receive already complete in "native" swap
C
        ELSE
C
C         protocol error
          WRITE (0,901) COMMOPT, PROTOPT
          STOP                                                   
C
        ENDIF
C
      ELSE
C***********************************************************************
C       undefined swap option
C***********************************************************************
C
        WRITE (0,900) COMMOPT
  900   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP3:',
     &         /,' UNKNOWN COMMUNICATION OPTION SPECIFIED',/, 
     &           ' COMMOPT = ',I3)                                 
        STOP                                                   
C
      ENDIF
C
      IF ((IERR1 .NE. 0) .OR. (IERR2 .NE. 0)) THEN
        WRITE (0,902) IERR1, IERR2
  902   FORMAT(/,' PSTSWM: FATAL ERROR IN SUBROUTINE SWAP3:',/,
     &           ' NONZERO MPI ROUTINE ERROR CODE RETURNED',/, 
     &           ' IERR1 = ',I3,' IERR2 = ',I3)
        STOP
      ENDIF
C
      RETURN
      END
C
CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
