      PROGRAM POLY3
C
      INCLUDE 'poly3.inc'
      INCLUDE 'fpvm3.h'
      INCLUDE 'dattyp.inc'

C     =================================================
C     ===                                           ===
C     ===   Program:  Communication Bottleneck      ===
C     ===   Version:  PVM3 + Fortran 77             ===
C     ===                                           ===
C     =================================================

C     /* Program name */
      CHARACTER*10  NAME /'poly3_pvm'/


C     /* Timer  routine */
      EXTERNAL DWALLTIME00
      DOUBLE PRECISION DWALLTIME00
C
C     PVM Variables:
C
      CHARACTER*5 GROUP /'poly3'/
      INTEGER I, IBUF, INFO, ME, MYTID, TIDS(0:MAXNOD)
C
C     /* maximum length of arrays */
C
      INTEGER NNMAX
      PARAMETER(NNMAX=10000)
      COMMON /SHDATA/ A(NNMAX),B(NNMAX)
      DOUBLE PRECISION A,B
C
      COMMON /COEFFS/ S0,S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11
      DOUBLE PRECISION S0,S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11
C
      INTEGER CT,JT,KFSEL,MREF,N,NFLOP,NFLP,NMAX,NNODE,
     &        NPOLRP,NPOLYS,NSEL,NSLAVE,NTIM,NTIMES
      DOUBLE PRECISION FHALF,FRAT(50),PER,RHAT,RINF,RLAST(50),RMFLPS,
     &                 S12,ST0,STIME,T0,T1,T2,T10,T11,TN,TOTIME,TS,X,
     &                 XN,XN12,XNLAST(50),Y
C
C     /* the unit number of output files */
C
      INTEGER IOUNIT, NW1
      PARAMETER(IOUNIT=10)
      PARAMETER(NW1=11)


c     This call launches all processes (if necessary)
c     and distributes the variables NNODES, ME, MYTID, TIDS

      call launch(NAME, GROUP, IOUNIT, 'poly3.dat', MAXNOD,
     &                  NNODE, ME, MYTID, TIDS)


C     Read in data and Broadcast them

      IF( ME .EQ. 0 ) THEN  

C
C        Get parameters from input file
C
         PRINT *
         PRINT *,'      POLY3:  Communication Bottleneck Test'
         PRINT *,'      -------------------------------------'
         PRINT *,'A vector of data is sent from a master process to a'
         PRINT *,'slave process which performs an evaluation of a'
         PRINT *,'polynomial for each element of the vector, and then'
         PRINT *,'returns the vector of results to the master.'
         PRINT *,'The slave may be selected as any of the processs.'
         PRINT *,'      -------------------------------------'
         PRINT *
         OPEN( UNIT=IOUNIT, FILE='poly3.dat', STATUS='OLD' )
         READ( IOUNIT,FMT = * ) 
         PRINT *,'Number of nodes =',NNODE
C
         PRINT *
         PRINT *,' master node, 0, exchanges with a slave node'
         PRINT *,' with a number in the range 1 to ', NNODE-1,'.'
         PRINT *,' --------------------------------------'
         PRINT *
         READ( IOUNIT,FMT = * ) NSLAVE
         IF( NNODE.EQ.2 ) THEN
	    NSLAVE = 1
         ELSE
            IF( NSLAVE.LT.1 .OR. NSLAVE.GT.NNODE-1 ) THEN
               PRINT *,'The slave node value', NSLAVE, ' is invalid.'
               PRINT *,'Try again:  1 <= slave number <=', NNODE-1
               PRINT *
               CALL PVMFEXIT(INFO)
               STOP
            ENDIF
         ENDIF
         PRINT *,'Slave number =',NSLAVE
         CLOSE(IOUNIT)
C
C        /* Send NSLAVE to nodes */
C
         CALL PVMFINITSEND(PVMDEFAULT,INFO)
         CALL PVMFPACK(ITYPE,NSLAVE,1,1,INFO)
         CALL PVMFMCAST(NNODE,TIDS,0,INFO)
      ELSE
         CALL PVMFRECV(TIDS(0),0,INFO)
         CALL PVMFUNPACK(ITYPE,NSLAVE,1,1,INFO)
      ENDIF

C     ======================

      IF( ME.EQ.0 ) THEN
         OPEN(NW1,FILE='poly3.res')
C
C       /* print header */
C
        CALL HEADER(NW1,NAME)
C
      ENDIF
C
C     /* Global synchronisation */
C
      CALL PVMFBARRIER(GROUP,NNODE,INFO)
      IF( INFO.LT.0 ) PRINT *,'ERROR: pvmfbarrier returned ',INFO
C
C
C     /* Initialise variables here */
C
      NTIMES=1000*NITER
      NMAX=NNMAX
C
C     /* Initialise coeffs of polynomial */
C
      S0=3.12
      S1=2.13
      S2=1.65
      S3=8.32
      S4=1.76
      S5=3.09
      S6=1.82
      S7=2.41
      S8=3.69
      S9=4.11
      S10=8.51
      S11=1.66
C
      IF( ME.EQ.0 ) THEN
C
C        /* INITIALIZE TIMER */
C
         T10 = DWALLTIME00()
C
C        /* INITIALIZE ARRAYS : */
C
         DO 30 I=1,NMAX
            A(I)=5.842*FLOAT(I)
            B(I)=3.9675*FLOAT(I)
   30    CONTINUE
C
         WRITE(NW1,*) 
         WRITE(NW1,*) 
         WRITE(NW1,*) '               POLY3: COMMUNICATION BOTTLENECK'
         WRITE(NW1,*) '               -------------------------------'
         WRITE(NW1,*) '               POLYNOMIAL BENCHMARK FOR FHALF'
         WRITE(NW1,*) '               ------------------------------'
         WRITE(NW1,*) '                    (DOUBLE PRECISION)'
         WRITE(NW1,*) 
         WRITE(NW1,347) 
  347    FORMAT('This benchmark tests the severity of communication',/,
     1 'bottlenecks by varying the amount of arithmetic per datum',/,
     1 'communicated which is called the computational intensity of',/,
     1 'the loop. The performance for long loop (vector) lengths,',/,
     1 'RINF, is represented as :',//,
     3 '               RINF = RHAT/(1 + FHALF/F)',//,
     4 'where   RHAT = peak Mflop/s rate of arithmetic pipeline',/,
     4 '               approached as F goes to infinity',/,
     5 '  and      F = computational intensity',/,
     6 '             = ratio floating operations/datum communicated',/,
     7 '       FHALF = F required to obtain RINF=RHAT/2',//,
     8 'The loop executed is polynomial evaluation by Horners rule,',/,
     9 'F varies as the order increases from 1 to 10.',//)
         WRITE(NW1,348) 
  348    FORMAT('A vector of data is sent from a master process to'/
     1 'a slave process, where the polynomial is evaluated, and',/,
     1 'the result vector is returned to the master.',/,
     1 'The computational intensity is equal to the order of the',/,
     1 'polynomial.',/,
     1 '----------------------------------------------------------',/,
     1 'Roger Hockney, October 1993',/,
     1 '----------------------------------------------------------')
C
      ENDIF
C
C     /* Number of polynomials */
C
      NPOLYS=20
      CT = 0
      DO 21 KFSEL=1,NPOLYS,1
C
C       /* Select polynomial order = NFLP*NPOLRP/2 */
C
        IF(KFSEL .EQ. 1) NFLP=2
        IF(KFSEL .EQ. 2) NFLP=4
        IF(KFSEL .EQ. 3) NFLP=6
        IF(KFSEL .EQ. 4) NFLP=8
        IF(KFSEL .EQ. 5) NFLP=10
        IF(KFSEL .EQ. 6) NFLP=12
        IF(KFSEL .EQ. 7) NFLP=14
        IF(KFSEL .EQ. 8) NFLP=16
        IF(KFSEL .EQ. 9) NFLP=18
        IF(KFSEL .GE. 10)NFLP=20
C
C       /* Number of repeats of 10th degree polynomial */
C
        NPOLRP=1
        IF( KFSEL.GE.11 .AND. KFSEL.LE.15 ) NPOLRP=2*(KFSEL-10)
        IF( KFSEL.GE.16 .AND. KFSEL.LE.20 ) NPOLRP=20*(KFSEL-15)
        NFLOP=NPOLRP*NFLP
C
        IF( ME.EQ.0 ) THEN
           MREF=2
           FRAT(KFSEL)=FLOAT(NFLOP)/FLOAT(MREF)
C
           WRITE(NW1,345)
  345      FORMAT(1X,//)
C
           WRITE(NW1,344) FRAT(KFSEL)  
  344      FORMAT(1X,'COMPUTATIONAL INTENSITY = flop per mem ref',
     2    F10.3,/,1X,'------------------------------------------')
C
           WRITE(NW1,346) NFLOP,MREF
  346      FORMAT(1X,/,'Floating operations per iteration =',I4,//,
     1              'Memory references   per iteration =',I4)
C
C         /* INITIALIZE LEAST SQUARES */
C
           CALL LSTSQ(0,XN,TN,RINF,XN12,PER)
C
           WRITE(NW1,668)
  668      FORMAT(//,2X,'LOOP',7X,'LOOP',8X,'RINF',8X,
     1     'N1/2',7X,'S1/2',
     1     5X,'ERROR',4X,'R-AVERAGE',6X,'T0',6X,'NTIM',
     2     /,1X,'LENGTH',6X,'SECS',7X,'MFLOP/S',4X,'VEC.LEN',6X,'FLOP',
     3     7X,'%',7X,'MFLOP/S',6X,'SECS',4X,'repeat')
        ENDIF
C
        DO 20 NSEL=1,12,1
C
C         /* SELECT VECTOR LENGTH */
C
          IF (NSEL .EQ. 1) N=10
          IF (NSEL .EQ. 2) N=50
          IF (NSEL .EQ. 3) N=100
          IF (NSEL .EQ. 4) N=200
          IF (NSEL .EQ. 5) N=500
          IF (NSEL .EQ. 6) N=1000
          IF (NSEL .EQ. 7) N=2000
          IF (NSEL .EQ. 8) N=3000
          IF (NSEL .EQ. 9) N=4000
          IF (NSEL .EQ. 10) N=6000
          IF (NSEL .EQ. 11) N=8000
          IF (NSEL .EQ. 12) N=10000
          IF (N .GT. NMAX) GOTO 20
C
C         /* SELECT REPEAT */
C
          NTIM=NTIMES/(NFLOP*N)
          IF (NTIM .LT. 1) NTIM=1
C
          IF( ME.EQ.0 ) THEN
C
C            /* Measure overhead */
C
             T1 = DWALLTIME00()
             DO 86 JT=1,NTIM
               CALL DUMMY(JT)
   86        CONTINUE
             T2 = DWALLTIME00()
             T0=T2-T1
C
C            /* TIME REMOTE POLYNOMIAL EVALUATION */
C
             T1 = DWALLTIME00()
C
             DO 16 JT=1,NTIM
               CALL DUMMY(JT)
C
C              /*  SEND DATA TO SLAVE FOR COMPUTATION */
C
               CALL PVMFINITSEND(PVMDEFAULT,INFO)
               CALL PVMFPACK(DTYPE,B,N,1,INFO)
               CALL PVMFSEND(TIDS(NSLAVE),10,INFO)
C
C              /* WAIT FOR RESULTS FROM SLAVE  */
C              /* RECEIVE  RESULTS FROM SLAVE  */
C
               CALL PVMFRECV(TIDS(NSLAVE),20,IBUF)
               CALL PVMFUNPACK(DTYPE,A,N,1,INFO)
C
   16        CONTINUE
          T2 = DWALLTIME00()
C
C         Output iteration info to user
C         Also, this statement appears to make this program work;
C         without it, some optimization causes a hang.
C         This statement is done 12 * NPOLYS = 240 times.
C
          CT = CT +1
          PRINT *, 'Count check = ', CT
C
          ELSEIF( ME.EQ.NSLAVE ) THEN
C
             DO 116 JT=1,NTIM
C
C              /* RECEIVE DATA FROM MASTER */
C
               CALL PVMFRECV(TIDS(0),10,IBUF)
               CALL PVMFUNPACK(DTYPE,B,N,1,INFO)
C
C              /* CALL DOALL TO COMPUTE POLYNOMIAL */
C
               CALL DOALL(N,NFLP,NPOLRP)
C
C              /* SEND RESULTS TO MASTER */
C
               CALL PVMFINITSEND(PVMDEFAULT,INFO)
               CALL PVMFPACK(DTYPE,A,N,1,INFO)
               CALL PVMFSEND(TIDS(0),20,INFO)
C
C              /*    SLAVE CODE  */
C
  116        CONTINUE
C
           ENDIF
C
           IF( ME.EQ.0 ) THEN
C
C             /* Average loop time, TS  */
C
              TS=((T2-T1)-T0)/FLOAT(NTIM)
C
C             /* Convert to time per vector operation */
C
              TN=TS/FLOAT(NFLOP)
C
C             /* ESCAPE IF VECTOR TOO LONG */
C
              IF( N.EQ.0 ) GOTO 20
C
C             /* UPDATE LEAST SQUARES */
C
              XN=N
              CALL LSTSQ(1,XN,TN,RINF,XN12,PER)
C
C             /* CONVERT TO MFLOP/S */
C
              RINF=RINF*1.0E-6
C
C             /* AVERAGE MFLOP/S */
C
              RMFLPS= (FLOAT(N)/TN)*1.0E-6
              S12   = NFLOP*XN12
              STIME = NFLOP*TN
              ST0   = 0.0
              IF( RINF.NE.0.0 ) ST0 = (S12/RINF)*1.0E-06
              IF( NSEL.EQ.1 ) ST0 = STIME
C
              WRITE(NW1,100) N,STIME,RINF,XN12,S12,PER		
     2                  ,RMFLPS,ST0,NTIM
  100         FORMAT(1X,I6,1PE12.3,0PF12.6,F12.3,1PE12.3,F7.2,
     1         0PF12.6,1PE12.3,I8)
C
C             /* End master code */
C
           ENDIF
C
C       /* END OF NSEL LOOP */
C
   20   CONTINUE
C
        IF( ME.EQ.0 ) THEN
           RLAST(KFSEL)=RINF
           XNLAST(KFSEL)=XN12
        ENDIF
C
C     /* END OF KFSEL LOOP */
C
   21 CONTINUE
C
      IF( ME.EQ.0 ) THEN
C  
C        /* INITIALIZE LEAST SQUARES */
C
         CALL LSTSQ(0,X,Y,RHAT,FHALF,PER)
C
         WRITE(NW1,401)
  401    FORMAT(//,'                     CALCULATION OF FHALF',/,
     1              '                 --------------------',//,
     1    'A straight line is being fitted to y as function of x.',/,
     1    'Inverse slope is RHAT and negative intercept is FHALF.',/,
     1    'If y-values are obviously too noisy, try larger value',/
     1    'of parameter NITER in poly3.inc',//,
     2    2X,' F=x ',7X,' y  ',10X,'RINF',8X,' RHAT  ',2X,'  FHALF',
     2    4X,'ERROR',/,3X,'f/m',6X,'us/mref',7X,'Mflop/s',
     3    6X,'Mflop/s',3X,'flop/mref',4X,'%')
C
         DO 41 KFSEL=1,NPOLYS
           Y=FRAT(KFSEL)/RLAST(KFSEL)
           X=FRAT(KFSEL)
C          /* UPDATE LEAST SQUARES */
           CALL LSTSQ(1,X,Y,RHAT,FHALF,PER)
           WRITE(NW1,400) X,Y,RLAST(KFSEL),RHAT,FHALF,PER        
  400      FORMAT(1X,F6.1,3(1X,0PF12.6),1X,F9.4,1X,F7.2)
   41    CONTINUE
C
         WRITE(NW1,402) RHAT,FHALF
  402    FORMAT(///10X,'LAST VALUES :    RHAT =',0PF12.6,' Mflop/s',//,
     1         10X,'                FHALF =',F12.4,' flop/mref')
C 
         T11 = DWALLTIME00()
C
         TOTIME=T11-T10
         WRITE(NW1,25) TOTIME
   25    FORMAT(//,5X,'TOTAL EXECUTION TIME IS ',1PE20.10,' SECONDS.')
C
         CLOSE(NW1)
C
         PRINT *,'Benchmark completed.'
C 
      ENDIF
C
C     /* Global synchronisation */
C
      CALL PVMFBARRIER(GROUP,NNODE,INFO)
      IF( INFO.LT.0 ) PRINT *,'ERROR: pvmfbarrier returned ',INFO
C
C     Program finished. Leave group and PVM before exiting
C
      CALL PVMFLVGROUP(GROUP,INFO)
      CALL PVMFEXIT(INFO)
      STOP
C
      END
