ccm_barrier_mod.f90 0100644 0000765 0000024 00000012471 07601724105 014042 0 ustar tkaiser staff module ccm_barrier_mod
contains
subroutine ccm_barrier(wait,io_flush,the_err)
use ccm_numz
use ccm_error_mod
implicit none
real,optional :: wait
logical,optional :: io_flush
integer,optional :: the_err
integer :: c,r,m,it
real(b8):: t1,t2,dt,new_t,tx1,tx2
if(present(the_err))the_err=0
call MPI_barrier(mycomm,mpi_err);if(mpi_err .ne. 0)call ccm_mpi_error("ccm_barrier")
if(present(io_flush))then
if(io_flush)then
! do i=1,size(ccm_iounits)
! call flush(ccm_iounits(i))
! enddo
call MPI_barrier(mycomm,mpi_err);if(mpi_err .ne. 0)call ccm_mpi_error("ccm_barrier")
endif
endif
if(present(wait))then
dt=wait
else
return
endif
tx1=ccm_time()+dt
if(dt .gt. 0 .and. dt .lt. 1.0_b8)then
call system_clock(count=c,count_rate=r,count_max=m)
t1=real(c,b8)/real(r,b8)
t2=t1+dt
new_t=t1
do while(t2 .gt. new_t)
call system_clock(count=c)
new_t=real(c,b8)/real(r,b8)
if(new_t .lt. t1)then
new_t=real(m,b8)-new_t
t1=new_t
t2=real(m,b8)-t2
endif
enddo
else
if(dt .gt. 0.0_b8)call sleep(nint(dt))
endif
tx2=ccm_time()
do while(tx2 .lt. tx1)
tx2=ccm_time()
enddo
call MPI_barrier(mycomm,mpi_err);if(mpi_err .ne. 0)call ccm_mpi_error("ccm_barrier")
end subroutine
subroutine ccm_checkin(wait,name,the_err)
use ccm_numz
use ccm_error_mod
implicit none
integer :: status(mpi_status_size)
real,optional,intent(in) :: wait
character(len=*),intent(in),optional :: name
integer, optional :: the_err
real(b8) :: wtime,st,tn,et
integer :: ierr,igot,i,from,len
logical :: flag
character(len=32) :: lname,gname
if(present(the_err))the_err=0
if(present(wait))then
if(wait .le. 0)return
wtime=wait
else
wtime=10.0
endif
if(present(name))then
if(len_trim(name) .le. 32)then
write(lname,"(a)")trim(name)
else
write(lname,"(a)")name(1:32)
endif
else
write(lname,"(a)")"ccm_undefined"
endif
lname=trim(lname)
len=len_trim(lname)
st=ccm_time()
tn=st
et=st+wtime
!write(*,"(i4,2f15.3)")myid,st,et
if(myid .eq. 0)then
igot=1
do while(igot .lt. numnodes .and. tn .lt. et)
flag=.false.
call MPI_Iprobe(MPI_ANY_SOURCE, 8765, mycomm, flag, status,ierr )
if(flag)then
from=status(mpi_source)
call mpi_recv(gname,32,mycharacter,from,8765,mycomm,status,ierr )
if(lge(gname(1:len),lname(1:len)) .and. lle(gname(1:len),lname(1:len)))then
igot=igot+1
!write(*,*)"igot=",igot
else
write(err_str1,*)"deadlock detected at ",tn," process ",myid," and ",from
write(err_str2,*)"waiting for label ",trim(gname)," but got ",trim(lname)
call ccm_warning("ccm_checkin")
if(present(the_err))the_err=-1
return
endif
endif
tn=ccm_time()
enddo
if(igot .ne. numnodes)then
write(err_str1,*)"process ",myid," timed out at ",tn," called with label ",trim(lname)
call ccm_warning("ccm_checkin")
if(present(the_err))the_err=-1
return
endif
do i=1,numnodes-1
call mpi_send(lname,32,mycharacter,i,9876,mycomm,ierr)
enddo
return
endif
call mpi_send(lname,32,mycharacter,0,8765,mycomm,ierr)
igot=0
do while(igot .lt. 1 .and. tn .lt. et)
flag=.false.
call MPI_Iprobe(MPI_ANY_SOURCE, 9876, mycomm, flag, status,ierr )
if(flag)then
from=status(mpi_source)
call mpi_recv(gname,32,mycharacter,from,9876,mycomm,status,ierr )
if(lge(gname(1:len),lname(1:len)) .and. lle(gname(1:len),lname(1:len)))then
igot=igot+1
else
write(err_str1,*)"deadlock detected at ",tn," process ",myid
write(err_str2,*)"waiting for label ",trim(gname)," but got ",trim(lname)
call ccm_warning("ccm_checkin")
if(present(the_err))the_err=-1
return
endif
endif
tn=ccm_time()
enddo
if(igot .ne. 1)then
write(err_str1,*)"process ",myid," timed out at ",tn," called with label ",trim(lname)
call ccm_warning("ccm_checkin")
if(present(the_err))the_err=-1
return
endif
end subroutine
end module
ccm_init_mod.f90 0100644 0000765 0000024 00000036457 07601724105 013371 0 ustar tkaiser staff module ccm_init_mod
logical,private,save :: ccm_i_did_it
logical,private,save :: ccm_open=.false.
integer,private,parameter :: ccm_ntypes=9 ! ccm_ntypes=9 if using real*4, real*8, real*16
integer,private,save :: ccm_sizes(ccm_ntypes),ccm_kinds(ccm_ntypes)
character(len=32),private,save :: ccm_names(ccm_ntypes)
private bytes
contains
function bytes(digits)
implicit none
integer,intent(in) :: digits
integer :: bytes,temp
temp=ceiling(real(digits)/8.0)
if(temp .gt. 8)then
bytes=16
return
endif
if(temp .gt. 4)then
bytes=8
return
endif
if(temp .gt. 2)then
bytes=4
return
endif
if(temp .gt. 1)then
bytes=2
return
endif
bytes=0
end function
subroutine ccm_unique(name,base)
use ccm_numz
implicit none
character (len=*) :: name
character (len=*), optional :: base
character (len=80) :: temp
if(present(base))then
write(temp,"(a,i5.5)")trim(base),myid
else
write(temp,"(a,i5.5)")trim("out_"),myid
endif
name=trim(temp)
end subroutine ccm_unique
subroutine ccm_info(my_id,num_procs,n_types,sizes,kinds,names,the_err)
use ccm_numz
use ccm_error_mod
implicit none
integer,optional,intent(out) :: my_id,num_procs,n_types
integer,optional,intent(out) :: sizes(:),kinds(:)
character(len=20),optional,intent(out) :: names(:)
integer,optional,intent(out) :: the_err
if(present(the_err))the_err=0
if(present(my_id))my_id=myid
if(present(num_procs))num_procs=numnodes
if(present(n_types))n_types=ccm_ntypes
if(present(sizes))then
if(size(sizes) .lt. ccm_ntypes)then
write(err_str1,*)"sizes array too small needs to be : ",ccm_ntypes
call ccm_warning("ccm_info")
if(present(the_err))the_err=1
return
else
sizes(1:ccm_ntypes)=ccm_sizes(1:ccm_ntypes)
endif
endif
if(present(kinds))then
if(size(kinds) .lt. ccm_ntypes)then
write(err_str1,*)"kinds array too small needs to be : ",ccm_ntypes
call ccm_warning("ccm_info")
if(present(the_err))the_err=1
return
else
kinds(1:ccm_ntypes)=ccm_kinds(1:ccm_ntypes)
endif
endif
if(present(names))then
if(size(names) .lt. ccm_ntypes)then
write(err_str1,*)"names array too small needs to be : ",ccm_ntypes
call ccm_warning("ccm_info")
if(present(the_err))the_err=1
return
else
names(1:ccm_ntypes)=ccm_names(1:ccm_ntypes)
endif
endif
end subroutine ccm_info
subroutine ccm_init(my_id,num_procs,info,the_err)
use ccm_numz
use ccm_error_mod
implicit none
real :: normal_real
complex :: normal_complex
double precision :: normal_dp
integer :: normal_integer
integer(def_int) :: my_integer
logical :: normal_logical
character :: normal_char
integer :: lr,lc,ld,li,la,ll,di
integer :: map_reals
integer, optional,intent (out) :: the_err
integer, optional,intent (out) :: my_id
integer, optional,intent (out) :: num_procs
logical, optional,intent (in) :: info
logical :: flag
logical, save :: been_called=.false.
integer :: real_extent,double_extent,comp_extent,dpcp_extent,int_extent
integer :: lshort,llong,lcomp,ldpcp,lint
real(b4):: short_x
real(b8):: long_x
complex(c4):: short_c
complex(c8):: long_c
if(present(the_err))the_err=MPI_ZERO
call ccm_clear_warning()
if(been_called)then
write(err_str1,*)"ccm_init has already been called"
call ccm_fatal("ccm_init")
return
endif
been_called =.true.
ccm_auto_print=.true.
do_tests=0
call date_and_time(values=ccm_start_time)
call MPI_INITIALIZED(flag,mpi_err)
if(flag)then
ccm_i_did_it=.false.
else
ccm_i_did_it=.true.
call MPI_INIT(mpi_err)
endif
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
ccm_open=.true.
! create my own version of MPI_DOUBLE_PRECISION and MPI_REAL, MPI_INTEGER
map_reals=200
select case (map_reals)
case(100)
! force to 4 and 8 bytes using character
call MPI_TYPE_CONTIGUOUS(4,MPI_CHARACTER,myreal,mpi_err)
call MPI_TYPE_COMMIT(myreal,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
call MPI_TYPE_CONTIGUOUS(8,MPI_CHARACTER,mydouble,mpi_err)
call MPI_TYPE_COMMIT(mydouble,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
! quad precision, not available on all machines
! but the mpi define should still work
call MPI_TYPE_CONTIGUOUS(16,MPI_CHARACTER,myqp,mpi_err)
call MPI_TYPE_COMMIT(myqp,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
case(200)
! use mpi_real4 and mpi_real8
call MPI_TYPE_CONTIGUOUS(1,mpi_real4,myreal,mpi_err)
call MPI_TYPE_COMMIT(myreal,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
call MPI_TYPE_CONTIGUOUS(1,mpi_real8,mydouble,mpi_err)
call MPI_TYPE_COMMIT(mydouble,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
! quad precision, not available on all machines
! but the mpi define should still work
call MPI_TYPE_CONTIGUOUS(2,mpi_real8,myqp,mpi_err)
call MPI_TYPE_COMMIT(myqp,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
case default
! use mpi_real mpi_double_precision
call MPI_TYPE_CONTIGUOUS(1,mpi_real,myreal,mpi_err)
call MPI_TYPE_COMMIT(myreal,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
call MPI_TYPE_CONTIGUOUS(1,mpi_double_precision,mydouble,mpi_err)
call MPI_TYPE_COMMIT(mydouble,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
! quad precision, not available on all machines
! but the mpi define should still work
call MPI_TYPE_CONTIGUOUS(2,mpi_double_precision,myqp,mpi_err)
call MPI_TYPE_COMMIT(myqp,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
end select
!
call MPI_TYPE_CONTIGUOUS(1,MPI_INTEGER,myint,mpi_err)
call MPI_TYPE_COMMIT(myint,mpi_err);if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
!
call MPI_TYPE_CONTIGUOUS(1,MPI_logical,mylogical,mpi_err)
call MPI_TYPE_COMMIT(mylogical,mpi_err);if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
call MPI_TYPE_CONTIGUOUS(1,MPI_character,mycharacter,mpi_err)
call MPI_TYPE_COMMIT(mycharacter,mpi_err);if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
! we define our version of complex in terms of our versions of real
!
call MPI_TYPE_CONTIGUOUS(2,myreal,mycomp,mpi_err)
call MPI_TYPE_COMMIT(mycomp,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
!
call MPI_TYPE_CONTIGUOUS(2,mydouble,mydpcomp,mpi_err)
call MPI_TYPE_COMMIT(mydpcomp,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
!
! quad precision, not available on all machines
! but the mpi define should still work
call MPI_TYPE_CONTIGUOUS(2,myqp,myqpcomp,mpi_err)
call MPI_TYPE_COMMIT(myqpcomp,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
! check the match between fortran sizes and mpi sizes
call MPI_TYPE_EXTENT(myreal,real_extent,mpi_err)
short_x=0.0
lshort=bytes(digits(short_x))
!
call MPI_TYPE_EXTENT(mydouble,double_extent,mpi_err)
long_x=0.0
llong=bytes(digits(long_x))
!
call MPI_TYPE_EXTENT(mycomp,comp_extent,mpi_err)
short_c=0.0
lcomp=bytes(digits(real(short_c,c4)))*2
!
call MPI_TYPE_EXTENT(mydpcomp,dpcp_extent,mpi_err)
long_c=0.0
ldpcp=bytes(digits(real(long_c,c8)))*2
!
call MPI_TYPE_EXTENT(myint,int_extent,mpi_err)
my_integer=0
lint=bytes(digits(my_integer))
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
!
normal_real=0
normal_complex=0
normal_dp=0
normal_integer=0
normal_logical=.false.
normal_char=char(32)
lr=bytes(digits(normal_real))
lc=bytes(digits(real(normal_complex)))
ld=bytes(digits(normal_dp))
li=bytes(digits(normal_integer))
ll=1 !portable?
la=1 !portable?
call MPI_TYPE_EXTENT(mylogical,ll,mpi_err)
call MPI_TYPE_EXTENT(mycharacter,la,mpi_err)
! we want our own comm_world and associated information
call MPI_COMM_DUP( MPI_COMM_WORLD, mycomm, mpi_err );if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
call MPI_COMM_RANK( mycomm, myid, mpi_err );if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
call MPI_COMM_SIZE( mycomm, numnodes, mpi_err );if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
if(present(my_id))my_id=myid
if(present(num_procs))num_procs=numnodes
call ccm_unique(ccm_name,"ccm_out_")
ccm_sizes(1 )=bytes(digits(short_x))
ccm_kinds(1 )=kind(short_x)
ccm_names(1 )="real short"
ccm_sizes(2 )=bytes(digits(long_x))
ccm_kinds(2 )=kind(long_x)
ccm_names(2 )="real long"
ccm_sizes(3 )=bytes(digits(real(short_c,c4)))*2
ccm_kinds(3 )=kind(short_c)
ccm_names(3 )="complex short"
ccm_sizes(4 )=bytes(digits(real(long_c,c8)))*2
ccm_kinds(4 )=kind(long_c)
ccm_names(4 )="complex long"
ccm_sizes(5 )=bytes(digits(my_integer))
ccm_kinds(5 )=kind(my_integer)
ccm_names(5 )="integer default"
ccm_sizes(6 )=ll
ccm_kinds(6 )=kind(normal_logical)
ccm_names(6 )="logical default"
ccm_sizes(7 )=la
ccm_kinds(7 )=kind(normal_char)
ccm_names(7 )="character default"
if(ccm_ntypes .eq. 9)then
if(b16 .gt. 0)then
ccm_sizes(8 )=ccm_sizes(2 )*2
ccm_kinds(8 )=b16
ccm_names(8 )="quad real"
ccm_sizes(9 )=ccm_sizes(2 )*4
ccm_kinds(9 )=c16
ccm_names(9 )="quad complex"
else
ccm_sizes(8 )=-1
ccm_kinds(8 )=-1
ccm_names(8 )="quad real undefined"
ccm_sizes(9 )=-1
ccm_kinds(9 )=-1
ccm_names(9 )="quad complex undefined"
endif
endif
if(present(info))then
if(info .and. myid .eq. 0)then
write(*,*)"Collective Communications Module"
write(*,*)" MPI reference implementation"
write(*,fmt="(""real size = "",i4)")lr
write(*,fmt="(""complex size = "",i4)")lc
write(*,fmt="(""double size = "",i4)")ld
write(*,fmt="(""integer size = "",i4)")li
write(*,fmt="(""character size = "",i4)")la
write(*,fmt="(""logical size = "",i4)")ll
write(*,fmt="(""myint size = "",i4,"" integer(default) size = "",i4)")int_extent,lint
write(*,fmt="(""myreal size = "",i4,"" real(b4) size = "",i4)")real_extent,lshort
write(*,fmt="(""mydouble size = "",i4,"" real(b8) size = "",i4)")double_extent,llong
write(*,fmt="(""mycomplex size = "",i4,"" complex(c4) size = "",i4)")comp_extent,lcomp
write(*,fmt="(""mydpcomp size = "",i4,"" complex(c8) size = "",i4)")dpcp_extent,ldpcp
if(ccm_ntypes .eq. 9 .and. b16 .gt. 0)then
write(*,fmt="(""myqp size = "",i4,"" real(b16) size = "",i4)")double_extent*2,llong*2
write(*,fmt="(""myqpcomp size = "",i4,"" complex(16) size = "",i4)")double_extent*4,llong*4
endif
if(ccm_ntypes .eq. 9 .and. b16 .le. 0)then
write(*,fmt="(""myqp size = "",i4,"" real(b16) size = "",i4)")-1,-1
write(*,fmt="(""myqpcomp size = "",i4,"" complex(16) size = "",i4)")-1,-1
endif
endif
endif
if((real_extent .ne. lshort) .or. (double_extent .ne. llong) .or. &
(comp_extent .ne. lcomp) .or. (dpcp_extent .ne. ldpcp) .or. &
(li .ne. lint) ) then
write(*,*)"There is a missmatch between data types"
write(*,*)"Adjust the sizes in ccm_numz , ccm_init_mod and recompile"
write(*,fmt="(""real size = "",i4)")lr
write(*,fmt="(""complex size = "",i4)")lc
write(*,fmt="(""double size = "",i4)")ld
write(*,fmt="(""integer size = "",i4)")li
write(*,fmt="(""character size = "",i4)")la
write(*,fmt="(""logical size = "",i4)")ll
write(*,fmt="(""myint size = "",i4,"" integer(default) size = "",i4)")int_extent,lint
write(*,fmt="(""myreal size = "",i4,"" real(b4) size = "",i4)")real_extent,lshort
write(*,fmt="(""mydouble size = "",i4,"" real(b8) size = "",i4)")double_extent,llong
write(*,fmt="(""mycomplex size = "",i4,"" complex(c4) size = "",i4)")comp_extent,lcomp
write(*,fmt="(""mydpcomp size = "",i4,"" complex(c8) size = "",i4)")dpcp_extent,ldpcp
call MPI_FINALIZE(mpi_err)
stop
endif
end subroutine
subroutine ccm_close(the_err)
use ccm_numz
use ccm_error_mod
implicit none
integer, optional,intent (out) :: the_err
if(.not. ccm_open)then
write(err_str1,*)"ccm_init is not open"
call ccm_warning("ccm_close")
if(present(the_err))the_err=-1
return
endif
ccm_open=.false.
if(ccm_i_did_it)then
call mpi_finalize(mpi_err)
if(present(the_err))the_err=mpi_err
if(mpi_err .ne. MPI_ZERO)return
else
call mpi_type_free(myreal,mpi_err)
if(present(the_err))the_err=mpi_err
if(mpi_err .ne. MPI_ZERO)return
call mpi_type_free(mydouble,mpi_err)
if(present(the_err))the_err=mpi_err
if(mpi_err .ne. MPI_ZERO)return
call mpi_comm_free(mycomm,mpi_err)
if(present(the_err))the_err=mpi_err
if(mpi_err .ne. MPI_ZERO)return
endif
end subroutine ccm_close
end module
ccm_mod.f90 0100644 0000765 0000024 00000001025 07601724105 012325 0 ustar tkaiser staff module ccm
use ccm_allreduce_mod
use ccm_alltoall_mod
use ccm_alltoallv_mod
use ccm_barrier_mod
use ccm_bcast_mod
use ccm_gather_mod
use ccm_gatherv_mod
use ccm_init_mod
use ccm_numz, only : ccm_reproducible,ccm_fast,ccm_name,ccm_auto_print, &
ccm_timeout,ccm_time,ccm_testing,ccm_checksize, &
ccm_deadlock,ccm_trace,ccm_internal,ccm_alloff
use ccm_reduce_mod
use ccm_scatter_mod
use ccm_scatterv_mod
use ccm_error_mod
end module
make_mod.f90 0100644 0000765 0000024 00000024732 07601724105 012512 0 ustar tkaiser staff program make_mod
character (len=32) :: mod_name
character (len=132),target :: base1(200),base2(200),base3(200),base4(200)
character (len=132) :: temp1,temp2
character (len=132),pointer:: text(:)
character (len=32) :: rname(10),ftype(10),mpitype(10),shmemtype(10)
character (len=15) :: ranks(0:7)
character (len=15) :: one(0:7)
character (len=3) :: zone(0:7)
character (len=32)infile,outfile
integer :: start1,end1,start2,end2,end_max
character (len=1),allocatable::did_it(:,:)
character (len=1)::tab
logical :: same
ranks = (/" ","(:) ",&
"(:,:) ","(:,:,:) ",&
"(:,:,:,:) ","(:,:,:,:,:) ",&
"(:,:,:,:,:,:) ","(:,:,:,:,:,:,:)"/)
one = (/" ","(:) ",&
"(:,1) ","(:,1,1) ",&
"(:,1,1,1) ","(:,1,1,1,1) ",&
"(:,1,1,1,1,1) ","(:,1,1,1,1,1,1)"/)
zone= (/" ", "(:)",&
"(:)", "(:)",&
"(:)", "(:)",&
"(:)", "(:)"/)
tab=char(9)
read(*,"(a)")temp1
i1=index(temp1," ")
i2=len_trim(temp1)
read(temp1(1:i1-1),"(a)")infile
read(temp1(i1+1:i2),"(a)")outfile
write(*,*)infile,outfile
open(12,file=infile)
open(13,file=outfile)
read(12,*)same
read(12,*)mod_name
read(12,*)start1,end1,start2,end2
i=0
do itmp=1,10
read(12,"(a)")base1(1)
!if(index(base1(1) ,"!") .ne. 0)cycle
i=i+1
if(index(base1(1) ,"subroutine") .ne. 0 .or. &
index(base1(1) ,"function") .ne. 0)then
n=i-1
exit
endif
i0=1
i4=len_trim(base1(1))
i3=i4
k=0
do j=1,i4
if(ichar(base1(1)(j:j)) .eq. ichar(' '))then
if(k .eq. 0)i1=j
if(k .eq. 1)i2=j
if(k .eq. 2)i3=j
k=k+1
endif
enddo
read(base1(1)(i0:i1-1),"(a)")rname(i)
i1=i1+1
read(base1(1)(i1:i2-1),"(a)")ftype(i)
i2=i2+1
if(k .eq. 3)then
read(base1(1)(i2:i3-1),"(a)")mpitype(i)
i3=i3+1
read(base1(1)(i3:i4),"(a)")shmemtype(i)
else
read(base1(1)(i2:i4),"(a)")mpitype(i)
shmemtype(i)=""
endif
! write(*,*)"!",i0,i1,i2,i3,i4
! write(*,*)"!",rname(i),ftype(i),mpitype(i),shmemtype(i)
enddo
!base1 rank input .eq. 0 .and. rank output .eq. 0
do i=2,200
read(12,"(a)")base1(i)
if(index(base1(i) ,"end subroutine") .ne. 0 .or. &
index(base1(i) ,"end function") .ne. 0)then
l1=i
exit
endif
enddo
!base2 rank input .ne. 0 .and. rank output .ne. 0
do i=1,200
read(12,"(a)")base2(i)
if(index(base2(i) ,"end subroutine") .ne. 0 .or. &
index(base2(i) ,"end function") .ne. 0)then
l2=i
exit
endif
enddo
if(index(base1(3),"same") .ne. 0)then
do i=1,l2
base1(i)=base2(i)
enddo
l1=l2
endif
!!base3 rank input .ne. 0 .and. rank output .eq. 0
! do i=1,200
! read(12,"(a)")base3(i)
! if(index(base3(i) ,"end subroutine") .ne. 0)then
! l3=i
! exit
! endif
! enddo
!!base4 rank input .eq. 0 .and. rank output .ne. 0
! do i=1,200
! read(12,"(a)")base4(i)
! if(index(base4(i) ,"end subroutine") .ne. 0)then
! l4=i
! exit
! endif
! enddo
write(13,*)"!************************"
write(13,"(""module ccm_"",a,""_mod"")")trim(mod_name)
write(13,"("" public :: ccm_"",a)")trim(mod_name)
write(13,"("" interface ccm_"",a)")trim(mod_name)
end_max=max(end1,end2)
allocate(did_it(0:end_max,0:end_max))
did_it="n"
do k=1,n
do i=start1,end1
do j=start2,end2
if(same .and. (j .ne. i))cycle
did_it(i,j)="y"
if(index(rname(k),"!").ne. 0)cycle
write(13,"(8x,""module procedure "",a,""_"",a,i1,i1)")trim(mod_name),trim(rname(k)),i,j
enddo
enddo
enddo
write(13,*)" end interface"
write(13,*)" contains"
open(14,file="build_report",status="unknown",position="append")
write(14,*)"
"
write(14,"(a,a,a)",advance="no")"",trim(mod_name)," | "
do j=start2,end2
if(j .ne. end2) then
write(14,"(a,i1,a)",advance="no")"",j," | "
else
write(14,"(a,i1,a)")"",j," |
"
endif
enddo
do i=start1,end1
write(14,"(a,i1,a)",advance="no")"",i," | "
do j=start2,end2
if(j .ne. end2) then
write(14,"(a,a,a)",advance="no")"",did_it(i,j)," | "
else
write(14,"(a,a,a)")"",did_it(i,j)," |
"
endif
enddo
enddo
write(14,*)"
"
write(14,*)""
write(14,"(a,a,a)",advance="no")"",trim(mod_name)," | "
do k=1,n
if(index(rname(k),"!").ne. 0)cycle
write(14,"(a,a,a)",advance="no")"",trim(ftype(k))," | "
enddo
write(14,"(a)")"
"
write(14,*)"
"
close(14)
do k=1,n
if(index(rname(k),"!").ne. 0)cycle
do i=start1,end1
do j=start2,end2
if(same .and. (j .ne. i))cycle
if(i .eq. 0 .and. j .eq. 0)then
text=>base1
nl=l1
else
text=>base2
nl=l2
endif
! if(i .ne. 0 .and. j .ne. 0)then
! text=>base2
! nl=l2
! endif
! if(i .eq. 0 .and. j .ne. 0)then
! text=>base2
! nl=l3
! cycle
! endif
! if(i .ne. 0 .and. j .eq. 0)then
! text=>base2
! nl=l4
! cycle
! endif
do l=1,nl
write(temp1,"(a)")trim(text(l))
if((index(temp1,"! r1=0") .ne. 0) .and. (i .ne. 0))cycle
if((index(temp1,"! r1>0") .ne. 0) .and. (i .eq. 0))cycle
if((index(temp1,"! r2=0") .ne. 0) .and. (j .ne. 0))cycle
if((index(temp1,"! r2>0") .ne. 0) .and. (j .eq. 0))cycle
do while (index(temp1,"$1") .ne. 0)
! $1 == mod_name
in=index(temp1,"$1")
len=len_trim(temp1)
write(temp2,"(a,a,a)")(temp1(1:in-1)),trim(mod_name),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$2") .ne. 0)
! $2 == routine name with ranks
in=index(temp1,"$2")
len=len_trim(temp1)
write(temp2,"(a,a,2i1,a)")(temp1(1:in-1)),trim(rname(k)),i,j,trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$h") .ne. 0)
! $2 == routine name with ranks
in=index(temp1,"$h")
len=len_trim(temp1)
write(temp2,"(a,a,a)")(temp1(1:in-1)),trim(rname(k)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$3") .ne. 0)
! $3 == data type
in=index(temp1,"$3")
len=len_trim(temp1)
write(temp2,"(a,a,a)")(temp1(1:in-1)),trim(ftype(k)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$4") .ne. 0)
! $4 == input data rank
in=index(temp1,"$4")
len=len_trim(temp1)
write(temp2,"(a,a,a)")(temp1(1:in-1)),trim(ranks(i)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$5") .ne. 0)
! $5 == output data rank
in=index(temp1,"$5")
len=len_trim(temp1)
write(temp2,"(a,a,a)")trim(temp1(1:in-1)),trim(ranks(j)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$f") .ne. 0)
! $f == input data rank
in=index(temp1,"$f")
len=len_trim(temp1)
write(temp2,"(a,a,a)")(temp1(1:in-1)),trim(one(i)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$j") .ne. 0)
! $j == pointer rank
in=index(temp1,"$j")
len=len_trim(temp1)
write(temp2,"(a,a,a)")(temp1(1:in-1)),trim(zone(j)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$k") .ne. 0)
! $k == pointer rank
in=index(temp1,"$k")
len=len_trim(temp1)
write(temp2,"(a,a,a)")(temp1(1:in-1)),trim(zone(i)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$g") .ne. 0)
! $g == output data rank
in=index(temp1,"$g")
len=len_trim(temp1)
write(temp2,"(a,a,a)")trim(temp1(1:in-1)),trim(one(j)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$6") .ne. 0)
! $6 == mpitype
in=index(temp1,"$6")
len=len_trim(temp1)
write(temp2,"(a,a,a)")trim(temp1(1:in-1)),trim(mpitype(k)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$i") .ne. 0)
! $i == shmemtype
in=index(temp1,"$i")
len=len_trim(temp1)
write(temp2,"(a,a,a)")trim(temp1(1:in-1)),trim(shmemtype(k)),trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$l") .ne. 0)
! $l == i
in=index(temp1,"$l")
len=len_trim(temp1)
write(temp2,"(a,i1,a)")trim(temp1(1:in-1)),i,trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
do while (index(temp1,"$m") .ne. 0)
! $l == j
in=index(temp1,"$m")
len=len_trim(temp1)
write(temp2,"(a,i1,a)")trim(temp1(1:in-1)),j,trim(temp1(in+2:len))
write(temp1,"(a)")trim(temp2)
enddo
write(13,*)trim(temp1)
enddo
enddo
enddo
enddo
write(13,*)"end module"
end program
ccm_error_mod.f90 0100644 0000765 0000024 00000014473 07601724105 013551 0 ustar tkaiser staff module ccm_error_mod
contains
subroutine ccm_clear_warning()
use ccm_numz
write(err_str1,"(1a1)")" "
write(err_str2,"(1a1)")" "
write(err_str3,"(1a1)")" "
write(err_str4,"(1a1)")" "
end subroutine ccm_clear_warning
subroutine ccm_warning(routine,dtype,rank1,rank2)
use ccm_numz
character(len=*),intent(in):: routine
integer,optional :: dtype
integer,intent(in),optional::rank1,rank2
character(len=25):: the_type
write(the_type,*)"undefined"
if(present(dtype))then
write(the_type,*)"nonstandard"
if(dtype .eq. myreal)write(the_type,*)"real single precision"
if(dtype .eq. mydouble)write(the_type,*)"real double precision"
if(dtype .eq. mycomp)write(the_type,*)"complex single precision"
if(dtype .eq. mydpcomp)write(the_type,*)"complex double precision"
if(dtype .eq. myint)write(the_type,*)"normal integer"
if(dtype .eq. mycharacter)write(the_type,*)"character"
if(dtype .eq. mylogical)write(the_type,*)"logical"
if(dtype .eq. myquad)write(the_type,*)"real quad precision"
if(dtype .eq. myqpcomp)write(the_type,*)"complex quad precision"
if(dtype .eq. mylong)write(the_type,*)"alternate length integer"
endif
if(present(dtype))then
write(err_str0,*)"routine: ",routine," and data type ",the_type
else
write(err_str0,*)"routine: ",routine
endif
if(present(rank1))then
if(rank1 .gt. -1)then
write(err_str3,*)"rank of input data:",rank1
else
write(err_str3,*)"rank of input data: undefined"
endif
else
write(err_str3,*)" "
endif
if(present(rank2))then
if(rank2 .gt. -1)then
write(err_str4,*)"rank of output data:",rank2
else
write(err_str4,*)"rank of output data: undefined"
endif
else
write(err_str4,*)" "
endif
if(ccm_auto_print)then
write(*,"(i5,a,a)")myid,": ","Warning from collective communications module"
write(*,"(i5,a,a)")myid,": ",trim(err_str0)
write(*,"(i5,a,a)")myid,": ",trim(err_str1)
write(*,"(i5,a,a)")myid,": ",trim(err_str2)
write(*,"(i5,a,a)")myid,": ",trim(err_str3)
write(*,"(i5,a,a)")myid,": ",trim(err_str4)
endif
end subroutine
subroutine ccm_print_warning(file_number)
use ccm_numz
integer, optional :: file_number
if(present(file_number))then
write(file_number,*)"Warning from collective communications module"
write(file_number,"(i5,a,a)")myid,": ",trim(err_str0)
write(file_number,"(i5,a,a)")myid,": ",trim(err_str1)
write(file_number,"(i5,a,a)")myid,": ",trim(err_str2)
write(file_number,"(i5,a,a)")myid,": ",trim(err_str3)
write(file_number,"(i5,a,a)")myid,": ",trim(err_str4)
else
write(*,*)"Warning from collective communications module"
write(*,"(i5,a,a)")myid,": ",trim(err_str0)
write(*,"(i5,a,a)")myid,": ",trim(err_str1)
write(*,"(i5,a,a)")myid,": ",trim(err_str2)
write(*,"(i5,a,a)")myid,": ",trim(err_str3)
write(*,"(i5,a,a)")myid,": ",trim(err_str4)
endif
end subroutine ccm_print_warning
subroutine ccm_fatal(routine,dtype,rank1,rank2)
use ccm_numz
character(len=*),intent(in):: routine
integer,optional :: dtype
integer,intent(in),optional::rank1,rank2
character(len=25):: the_type
write(the_type,*)"undefined"
if(present(dtype))then
if(dtype .eq. myreal)write(the_type,*)"real single precision"
if(dtype .eq. mydouble)write(the_type,*)"real double precision"
if(dtype .eq. mycomp)write(the_type,*)"complex single precision"
if(dtype .eq. mydpcomp)write(the_type,*)"complex double precision"
if(dtype .eq. myint)write(the_type,*)"normal integer"
if(dtype .eq. mycharacter)write(the_type,*)"character"
if(dtype .eq. mylogical)write(the_type,*)"logical"
if(dtype .eq. mylong)write(the_type,*)"alternate length integer"
endif
write(*,*)"fatal error from collective communications module"
if(present(dtype))then
write(err_str0,*)"routine: ",routine," and data type ",the_type
else
write(err_str0,*)"routine: ",routine
endif
write(*,"(i5,a,a)")myid,": ",err_str0
if(present(rank1))then
if(rank1 .gt. -1)then
write(*,"(i5,a,a)")myid,": ","rank of input data:",rank1
else
write(*,"(i5,a,a)")myid,": ","rank of input data: undefined"
endif
endif
if(present(rank2))then
if(rank2 .gt. -1)then
write(*,"(i5,a,a)")myid,": ","rank of output data:",rank2
else
write(*,"(i5,a,a)")myid,": ","rank of output data: undefined"
endif
endif
write(*,"(i5,a,a)")myid,": ",trim(err_str1)
write(*,"(i5,a,a)")myid,": ",trim(err_str2)
call mpi_abort(mpi_comm_world,-1,mpi_err)
stop
end subroutine
subroutine ccm_mpi_error(routine,details)
use ccm_numz
character(len=*),intent(in):: routine
character(len=*),optional,intent(in):: details
write(err_str1,*)"underling communications error"
if(present(details))write(err_str2,*)details
call ccm_fatal(routine)
end subroutine ccm_mpi_error
end module
ccm_merge_mod.f90 0100644 0000765 0000024 00000147314 07601724105 013520 0 ustar tkaiser staff module ccm_merge
public :: merge_it
interface merge_it
module procedure merge_in
module procedure merge_sp
module procedure merge_dp
module procedure merge_comp
module procedure merge_dpcomp
module procedure merge_logical
! module procedure merge_qpcomp !qp
! module procedure merge_qp !qp
end interface
public ccm_operations
interface ccm_operations
module procedure list_in
module procedure list_sp
module procedure list_dp
module procedure list_comp
module procedure list_dpcomp
module procedure list_logical
! module procedure list_qpcomp !qp
! module procedure list_qp !qp
end interface
contains
subroutine list_in(x,list,n)
use ccm_numz
integer(def_int) :: x
character(len=4) :: list(:)
integer :: n
n=7
list(1:n)=(/"+ ", "min ", "max ", "* " , "and ", "or " , "xor "/)
end subroutine
subroutine list_sp(x,list,n)
use ccm_numz
real(b4) :: x
character(len=4) :: list(:)
integer :: n
n=4
list(1:n)=(/"+ ", "min ", "max ", "* " /)
end subroutine
subroutine list_dp(x,list,n)
use ccm_numz
real(b8) :: x
character(len=4) :: list(:)
integer :: n
n=4
list(1:n)=(/"+ ", "min ", "max ", "* "/)
end subroutine
subroutine list_comp(x,list,n)
use ccm_numz
complex(c4) :: x
character(len=4) :: list(:)
integer :: n
n=2
list(1:n)=(/"+ ", "* "/)
end subroutine
subroutine list_dpcomp(x,list,n)
use ccm_numz
complex(c8) :: x
character(len=4) :: list(:)
n=2
list(1:n)=(/"+ ", "* "/)
end subroutine
subroutine list_logical(x,list,n)
use ccm_numz
logical :: x
character(len=4) :: list(:)
integer :: n
n=3
list(1:n)=(/"and ", "or " , "xor "/)
end subroutine
subroutine get_oper(oper,intype,mpi_oper,my_oper,illegal)
use ccm_numz
use ccm_error_mod
implicit none
character (len= *),intent (in) :: oper
integer , intent(in) :: intype
integer , intent(out) :: mpi_oper
character (len= *),intent (out) :: my_oper
logical, intent(out) :: illegal
illegal=.false.
if (intype .eq. myreal .or. &
intype .eq. mydouble .or. &
intype .eq. myqp )then
select case (oper)
case ("+","sum","SUM")
mpi_oper=MPI_SUM
my_oper="+"
case ("min","MIN")
mpi_oper=MPI_MIN
my_oper="min"
case ("max","MAX")
mpi_oper=MPI_MAX
my_oper="max"
case ("*","prod","PROD")
mpi_oper=MPI_PROD
my_oper="*"
case default
my_oper="unde"
mpi_oper=-1
illegal=.true.
end select
endif
if (intype .eq. myint)then
select case (oper)
case ("+","sum","SUM")
mpi_oper=MPI_SUM
my_oper="+"
case ("min","MIN")
mpi_oper=MPI_MIN
my_oper="min"
case ("max","MAX")
mpi_oper=MPI_MAX
my_oper="max"
case ("*","prod","PROD")
mpi_oper=MPI_PROD
my_oper="*"
case ("and","AND")
mpi_oper=MPI_BAND
my_oper="and"
case ("or","OR")
mpi_oper=MPI_BOR
my_oper="or"
case ("xor","XOR")
mpi_oper=MPI_BXOR
my_oper="xor"
case default
my_oper="unde"
mpi_oper=-1
illegal=.true.
end select
endif
if (intype .eq. mycomp .or. &
intype .eq. mydpcomp .or. &
intype .eq. myqpcomp )then
select case (oper)
case ("+","sum","SUM")
mpi_oper=MPI_SUM
my_oper="+"
case ("*","prod","PROD")
mpi_oper=MPI_PROD
my_oper="*"
case default
my_oper="unde"
mpi_oper=-1
illegal=.true.
end select
endif
if (intype .eq. mylogical)then
select case (oper)
case ("and","AND")
mpi_oper=MPI_LAND
my_oper="and"
case ("or","OR")
mpi_oper=MPI_LOR
my_oper="or"
case ("xor","XOR")
mpi_oper=MPI_LXOR
my_oper="xor"
case default
my_oper="unde"
mpi_oper=-1
illegal=.true.
end select
endif
end subroutine get_oper
subroutine merge_in(input,n,output,oper,root,THETAG,THECOM,ierr)
use ccm_numz
use ccm_error_mod
implicit none
integer , intent(in) :: n
integer , intent(in) :: input(:)
integer , intent (inout):: output(:)
integer , intent(in) :: root
integer , intent(inout) :: ierr
character (len=*),intent (in) :: oper
integer :: THETAG,THECOM,NTAG,it
integer , allocatable:: rec(:)
integer , allocatable :: the_data(:)
integer :: AN_MPI_TYPE
integer :: p,active,rank,i
integer :: status(MPI_STATUS_SIZE)
logical :: did_get
did_get=.false.
AN_MPI_TYPE = myint
rank=myid
p=numnodes
if(root .ne. 0)then
rank=rank-root
if(rank .lt. 0)rank=(p+rank)
endif
active=1
do while (2*active < p)
active=active*2
enddo
if(rank .ge. active)then
it=rank-active+root
if(it .ge. p)it=p-it
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 1 myid=",myid,rank," to ",it,n,"x=",input
call MPI_SEND(input,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
return
endif
allocate(the_data(n))
if(rank + active .lt. p)then
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 2 myid=",myid,rank," from ",it,n,"x=",rec
did_get=.true.
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
call merge2_in(input,n,rec,oper)
the_data=rec
endif
do while (active .gt. 1)
active=active/2
if(rank .ge. active) then
it=rank-active+root
if(it .ge. p)it=p-it
if(.not. did_get)the_data(1:n)=input(1:n)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 3 myid=",myid,rank," to ",it,n,"x=",the_data
call MPI_SEND(the_data,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
else
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 4 myid=",myid,rank," from ",it,n,"x=",rec
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(.not. did_get)the_data(1:n)=input(1:n)
call merge2_in(rec,n,the_data,oper)
did_get=.true.
endif
enddo
if(myid .eq. root)then
do i=1,n
output(i)=the_data(i)
enddo
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
end subroutine merge_in
subroutine merge2_in(d1,n,d2,oper)
use ccm_numz
use ccm_error_mod
integer, intent (in) :: n
integer, intent (in):: d1(*)
integer, intent (inout):: d2(*)
character (len=*),intent (in) :: oper
integer :: i
select case (oper)
case ("+")
d2(1:n)=d1(1:n)+d2(1:n)
case ("min")
d2(1:n)=min(d1(1:n),d2(1:n))
case ("max")
d2(1:n)=max(d1(1:n),d2(1:n))
case ("*")
d2(1:n)=d1(1:n)*d2(1:n)
case ("and")
d2(1:n)=iand(d1(1:n),d2(1:n))
case ("or")
d2(1:n)=ior(d1(1:n),d2(1:n))
case ("xor")
d2(1:n)=ieor(d1(1:n),d2(1:n))
case default
write(*,*)"error in merge2_in oper = ",oper
end select
end subroutine merge2_in
subroutine merge_sp(input,n,output,oper,root,THETAG,THECOM,ierr)
use ccm_numz
use ccm_error_mod
implicit none
integer, intent(in) :: n
real(b4), intent(in) :: input(n)
real(b4), intent (inout):: output(n)
integer, intent(in) :: root
integer, intent(inout) :: ierr
character (len=*),intent (in) :: oper
integer :: THETAG,THECOM,NTAG,it
real(b4) , allocatable:: rec(:)
real(b4) , allocatable :: the_data(:)
integer :: AN_MPI_TYPE
integer p,active,rank,i
integer :: status(MPI_STATUS_SIZE)
logical :: did_get
did_get=.false.
AN_MPI_TYPE = myreal
rank=myid
p=numnodes
if(root .ne. 0)then
rank=rank-root
if(rank .lt. 0)rank=(p+rank)
endif
active=1
do while (2*active < p)
active=active*2
enddo
if(rank .ge. active)then
it=rank-active+root
if(it .ge. p)it=p-it
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 1 myid=",myid,rank," to ",it,n,"x=",input
call MPI_SEND(input,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
return
endif
allocate(the_data(n))
if(rank + active .lt. p)then
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 2 myid=",myid,rank," from ",it,n,"x=",rec
did_get=.true.
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
call merge2_sp(input,n,rec,oper)
the_data=rec
endif
do while (active .gt. 1)
active=active/2
if(rank .ge. active) then
it=rank-active+root
if(it .ge. p)it=p-it
if(.not. did_get)the_data(1:n)=input(1:n)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 3 myid=",myid,rank," to ",it,n,"x=",the_data
call MPI_SEND(the_data,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
else
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 4 myid=",myid,rank," from ",it,n,"x=",rec
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(.not. did_get)the_data(1:n)=input(1:n)
call merge2_sp(rec,n,the_data,oper)
did_get=.true.
endif
enddo
if(myid .eq. root)then
do i=1,n
output(i)=the_data(i)
enddo
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
end subroutine merge_sp
subroutine merge2_sp(d1,n,d2,oper)
use ccm_numz
use ccm_error_mod
integer, intent (in) :: n
real(b4), intent (in):: d1(*)
real(b4), intent (inout):: d2(*)
character (len=*),intent (in) :: oper
integer :: i
select case (oper)
case ("+")
d2(1:n)=d1(1:n)+d2(1:n)
case ("min")
d2(1:n)=min(d1(1:n),d2(1:n))
case ("max")
d2(1:n)=max(d1(1:n),d2(1:n))
case ("*")
d2(1:n)=d1(1:n)*d2(1:n)
case default
write(*,*)"error in merge2_sp oper = ",oper
end select
end subroutine merge2_sp
subroutine merge_dp(input,n,output,oper,root,THETAG,THECOM,ierr)
use ccm_numz
use ccm_error_mod
implicit none
integer, intent(in) :: n
real(b8), intent(in) :: input(n)
real(b8), intent (inout):: output(n)
integer, intent(in) :: root
integer, intent(inout) :: ierr
character (len=*),intent (in) :: oper
integer :: THETAG,THECOM,NTAG,it
real(b8) , allocatable:: rec(:)
real(b8) , allocatable :: the_data(:)
integer :: AN_MPI_TYPE
integer :: p,active,rank,i
integer :: status(MPI_STATUS_SIZE)
logical :: did_get
did_get=.false.
AN_MPI_TYPE = mydouble
rank=myid
p=numnodes
if(root .ne. 0)then
rank=rank-root
if(rank .lt. 0)rank=(p+rank)
endif
active=1
do while (2*active < p)
active=active*2
enddo
if(rank .ge. active)then
it=rank-active+root
if(it .ge. p)it=p-it
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 1 myid=",myid,rank," to ",it,n,"x=",input
call MPI_SEND(input,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
return
endif
allocate(the_data(n))
if(rank + active .lt. p)then
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 2 myid=",myid,rank," from ",it,n,"x=",rec
did_get=.true.
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
call merge2_dp(input,n,rec,oper)
the_data=rec
endif
do while (active .gt. 1)
active=active/2
if(rank .ge. active) then
it=rank-active+root
if(it .ge. p)it=p-it
if(.not. did_get)the_data(1:n)=input(1:n)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 3 myid=",myid,rank," to ",it,n,"x=",the_data
call MPI_SEND(the_data,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
else
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 4 myid=",myid,rank," from ",it,n,"x=",rec
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(.not. did_get)the_data(1:n)=input(1:n)
call merge2_dp(rec,n,the_data,oper)
did_get=.true.
endif
enddo
if(myid .eq. root)then
do i=1,n
output(i)=the_data(i)
enddo
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
end subroutine merge_dp
subroutine merge2_dp(d1,n,d2,oper)
use ccm_numz
use ccm_error_mod
integer, intent (in) :: n
real(b8), intent (in):: d1(*)
real(b8), intent (inout):: d2(*)
character (len=*),intent (in) :: oper
integer :: i
select case (oper)
case ("+")
d2(1:n)=d1(1:n)+d2(1:n)
case ("min")
d2(1:n)=min(d1(1:n),d2(1:n))
case ("max")
d2(1:n)=max(d1(1:n),d2(1:n))
case ("*")
d2(1:n)=d1(1:n)*d2(1:n)
case default
write(*,*)"error in merge2_dp oper = ",oper
end select
end subroutine merge2_dp
subroutine merge_comp(input,n,output,oper,root,THETAG,THECOM,ierr)
use ccm_numz
use ccm_error_mod
implicit none
integer, intent(in) :: n
complex(c4), intent(in) :: input(n)
complex(c4), intent (inout):: output(n)
integer, intent(in) :: root
integer, intent(inout) :: ierr
character (len=*),intent (in) :: oper
integer :: THETAG,THECOM,NTAG,it
complex(c4) , allocatable:: rec(:)
complex(c4) , allocatable :: the_data(:)
integer :: AN_MPI_TYPE
integer p,active,rank,i
integer :: status(MPI_STATUS_SIZE)
logical :: did_get
did_get=.false.
AN_MPI_TYPE = mycomp
rank=myid
p=numnodes
if(root .ne. 0)then
rank=rank-root
if(rank .lt. 0)rank=(p+rank)
endif
active=1
do while (2*active < p)
active=active*2
enddo
if(rank .ge. active)then
it=rank-active+root
if(it .ge. p)it=p-it
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 1 myid=",myid,rank," to ",it,n,"x=",input
call MPI_SEND(input,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
return
endif
allocate(the_data(n))
if(rank + active .lt. p)then
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 2 myid=",myid,rank," from ",it,n,"x=",rec
did_get=.true.
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
call merge2_complex(input,n,rec,oper)
the_data=rec
endif
do while (active .gt. 1)
active=active/2
if(rank .ge. active) then
it=rank-active+root
if(it .ge. p)it=p-it
if(.not. did_get)the_data(1:n)=input(1:n)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 3 myid=",myid,rank," to ",it,n,"x=",the_data
call MPI_SEND(the_data,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
else
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 4 myid=",myid,rank," from ",it,n,"x=",rec
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(.not. did_get)the_data(1:n)=input(1:n)
call merge2_complex(rec,n,the_data,oper)
did_get=.true.
endif
enddo
if(myid .eq. root)then
do i=1,n
output(i)=the_data(i)
enddo
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
end subroutine merge_comp
subroutine merge2_complex(d1,n,d2,oper)
use ccm_numz
use ccm_error_mod
integer, intent (in) :: n
complex(c4), intent (in):: d1(*)
complex(c4), intent (inout):: d2(*)
character (len=*),intent (in) :: oper
integer :: i
select case (oper)
case ("+")
d2(1:n)=d1(1:n)+d2(1:n)
case ("*")
d2(1:n)=d1(1:n)*d2(1:n)
case default
write(*,*)"error in merge2_complex oper = ",oper
end select
end subroutine merge2_complex
subroutine merge_dpcomp(input,n,output,oper,root,THETAG,THECOM,ierr)
use ccm_numz
use ccm_error_mod
implicit none
integer, intent(in) :: n
complex(c8), intent(in) :: input(n)
complex(c8), intent (inout):: output(n)
integer, intent(in) :: root
integer, intent(inout) :: ierr
character (len=*),intent (in) :: oper
integer :: THETAG,THECOM,NTAG,it
complex(c8) , allocatable:: rec(:)
complex(c8) , allocatable :: the_data(:)
integer :: AN_MPI_TYPE
integer p,active,rank,i
integer :: status(MPI_STATUS_SIZE)
logical :: did_get
did_get=.false.
AN_MPI_TYPE = mydpcomp
rank=myid
p=numnodes
if(root .ne. 0)then
rank=rank-root
if(rank .lt. 0)rank=(p+rank)
endif
active=1
do while (2*active < p)
active=active*2
enddo
if(rank .ge. active)then
it=rank-active+root
if(it .ge. p)it=p-it
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 1 myid=",myid,rank," to ",it,n,"x=",input
call MPI_SEND(input,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
return
endif
allocate(the_data(n))
if(rank + active .lt. p)then
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 2 myid=",myid,rank," from ",it,n,"x=",rec
did_get=.true.
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
call merge2_dpcomp(input,n,rec,oper)
the_data=rec
endif
do while (active .gt. 1)
active=active/2
if(rank .ge. active) then
it=rank-active+root
if(it .ge. p)it=p-it
if(.not. did_get)the_data(1:n)=input(1:n)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 3 myid=",myid,rank," to ",it,n,"x=",the_data
call MPI_SEND(the_data,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
else
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 4 myid=",myid,rank," from ",it,n,"x=",rec
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(.not. did_get)the_data(1:n)=input(1:n)
call merge2_dpcomp(rec,n,the_data,oper)
did_get=.true.
endif
enddo
if(myid .eq. root)then
do i=1,n
output(i)=the_data(i)
enddo
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
end subroutine merge_dpcomp
subroutine merge2_dpcomp(d1,n,d2,oper)
use ccm_numz
use ccm_error_mod
integer, intent (in) :: n
complex(c8), intent (in):: d1(*)
complex(c8), intent (inout):: d2(*)
character (len=*),intent (in) :: oper
integer :: i
select case (oper)
case ("+")
d2(1:n)=d1(1:n)+d2(1:n)
case ("*")
d2(1:n)=d1(1:n)*d2(1:n)
case default
write(*,*)"error in merge2_dpcomp oper = ",oper
end select
end subroutine merge2_dpcomp
subroutine merge_logical(input,n,output,oper,root,THETAG,THECOM,ierr)
use ccm_numz
use ccm_error_mod
implicit none
integer, intent(in) :: n
logical, intent(in) :: input(n)
logical, intent (inout):: output(n)
integer, intent(in) :: root
integer, intent(inout) :: ierr
character (len=*),intent (in) :: oper
integer :: THETAG,THECOM,NTAG,it
logical , allocatable:: rec(:)
logical , allocatable :: the_data(:)
integer :: AN_MPI_TYPE
integer p,active,rank,i
integer :: status(MPI_STATUS_SIZE)
logical :: did_get
did_get=.false.
AN_MPI_TYPE = mpi_logical
rank=myid
p=numnodes
if(root .ne. 0)then
rank=rank-root
if(rank .lt. 0)rank=(p+rank)
endif
active=1
do while (2*active < p)
active=active*2
enddo
if(rank .ge. active)then
it=rank-active+root
if(it .ge. p)it=p-it
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 1 myid=",myid,rank," to ",it,n,"x=",input
call MPI_SEND(input,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
return
endif
allocate(the_data(n))
if(rank + active .lt. p)then
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 2 myid=",myid,rank," from ",it,n,"x=",rec
did_get=.true.
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
call merge2_logical(input,n,rec,oper)
the_data=rec
endif
do while (active .gt. 1)
active=active/2
if(rank .ge. active) then
it=rank-active+root
if(it .ge. p)it=p-it
if(.not. did_get)the_data(1:n)=input(1:n)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"s 3 myid=",myid,rank," to ",it,n,"x=",the_data
call MPI_SEND(the_data,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr)
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
else
if(.not.(allocated(rec))) allocate(rec(n))
it=rank+active+root
if(it .ge. p)it=p-it
call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr)
if(iand(do_tests,ccm_internal) .ne. 0) &
write(*,*)"r 4 myid=",myid,rank," from ",it,n,"x=",rec
if(ierr .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")ierr
call ccm_fatal("reduce",AN_MPI_TYPE)
endif
if(.not. did_get)the_data(1:n)=input(1:n)
call merge2_logical(rec,n,the_data,oper)
did_get=.true.
endif
enddo
if(myid .eq. root)then
do i=1,n
output(i)=the_data(i)
enddo
endif
if(allocated(the_data))deallocate(the_data)
if(allocated(rec))deallocate(rec)
return
end subroutine merge_logical
subroutine merge2_logical(d1,n,d2,oper)
use ccm_numz
use ccm_error_mod
integer, intent (in) :: n
logical, intent (in):: d1(*)
logical, intent (inout):: d2(*)
character (len=*),intent (in) :: oper
integer :: i
select case (oper)
case ("and")
d2(1:n)=d1(1:n) .and. d2(1:n)
case ("or")
d2(1:n)=d1(1:n) .or. d2(1:n)
case ("xor")
d2(1:n)=d1(1:n) .neqv. d2(1:n)
case default
write(*,*)"error in merge2_logical oper = ",oper
end select
end subroutine merge2_logical
! subroutine merge_qpcomp(input,n,output,oper,root,THETAG,THECOM,ierr) !qp
! use ccm_numz !qp
! use ccm_error_mod !qp
! implicit none !qp
! integer, intent(in) :: n !qp
! complex(c16), intent(in) :: input(n) !qp
! complex(c16), intent (inout):: output(n) !qp
! integer, intent(in) :: root !qp
! integer, intent(inout) :: ierr !qp
! character (len=*),intent (in) :: oper !qp
! integer :: THETAG,THECOM,NTAG,it !qp
! complex(c16) , allocatable:: rec(:) !qp
! complex(c16) , allocatable :: the_data(:) !qp
! integer :: AN_MPI_TYPE !qp
! integer p,active,rank,i !qp
! integer :: status(MPI_STATUS_SIZE) !qp
! logical :: did_get !qp
! did_get=.false. !qp
! AN_MPI_TYPE = mydpcomp !qp
! rank=myid !qp
! p=numnodes !qp
! if(root .ne. 0)then !qp
! rank=rank-root !qp
! if(rank .lt. 0)rank=(p+rank) !qp
! endif !qp
! active=1 !qp
! do while (2*active < p) !qp
! active=active*2 !qp
! enddo !qp
! if(rank .ge. active)then !qp
! it=rank-active+root !qp
! if(it .ge. p)it=p-it !qp
! if(iand(do_tests,ccm_internal) .ne. 0) & !qp
! write(*,*)"s 1 myid=",myid,rank," to ",it,n,"x=",input !qp
! call MPI_SEND(input,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr) !qp
! if(ierr .ne. 0)then !qp
! write(err_str1,"(""low level communication error:"",i5)")ierr !qp
! call ccm_fatal("reduce",AN_MPI_TYPE) !qp
! endif !qp
! return !qp
! endif !qp
! allocate(the_data(n)) !qp
! if(rank + active .lt. p)then !qp
! if(.not.(allocated(rec))) allocate(rec(n)) !qp
! it=rank+active+root !qp
! if(it .ge. p)it=p-it !qp
! call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr) !qp
! if(iand(do_tests,ccm_internal) .ne. 0) & !qp
! write(*,*)"r 2 myid=",myid,rank," from ",it,n,"x=",rec !qp
! did_get=.true. !qp
! if(ierr .ne. 0)then !qp
! write(err_str1,"(""low level communication error:"",i5)")ierr !qp
! call ccm_fatal("reduce",AN_MPI_TYPE) !qp
! endif !qp
! call merge2_qpcomp(input,n,rec,oper) !qp
! the_data=rec !qp
! endif !qp
! do while (active .gt. 1) !qp
! active=active/2 !qp
! if(rank .ge. active) then !qp
! it=rank-active+root !qp
! if(it .ge. p)it=p-it !qp
! if(.not. did_get)the_data(1:n)=input(1:n) !qp
! if(iand(do_tests,ccm_internal) .ne. 0) & !qp
! write(*,*)"s 3 myid=",myid,rank," to ",it,n,"x=",the_data !qp
! call MPI_SEND(the_data,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr) !qp
! if(ierr .ne. 0)then !qp
! write(err_str1,"(""low level communication error:"",i5)")ierr !qp
! call ccm_fatal("reduce",AN_MPI_TYPE) !qp
! endif !qp
! if(allocated(the_data))deallocate(the_data) !qp
! if(allocated(rec))deallocate(rec) !qp
! return !qp
! else !qp
! if(.not.(allocated(rec))) allocate(rec(n)) !qp
! it=rank+active+root !qp
! if(it .ge. p)it=p-it !qp
! call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr) !qp
! if(iand(do_tests,ccm_internal) .ne. 0) & !qp
! write(*,*)"r 4 myid=",myid,rank," from ",it,n,"x=",rec !qp
! if(ierr .ne. 0)then !qp
! write(err_str1,"(""low level communication error:"",i5)")ierr !qp
! call ccm_fatal("reduce",AN_MPI_TYPE) !qp
! endif !qp
! if(.not. did_get)the_data(1:n)=input(1:n) !qp
! call merge2_qpcomp(rec,n,the_data,oper) !qp
! did_get=.true. !qp
! endif !qp
! enddo !qp
! if(myid .eq. root)then !qp
! do i=1,n !qp
! output(i)=the_data(i) !qp
! enddo !qp
! endif !qp
! if(allocated(the_data))deallocate(the_data) !qp
! if(allocated(rec))deallocate(rec) !qp
! return !qp
! end subroutine merge_qpcomp !qp
! subroutine merge2_qpcomp(d1,n,d2,oper) !qp
! use ccm_numz !qp
! use ccm_error_mod !qp
! integer, intent (in) :: n !qp
! complex(c16), intent (in):: d1(*) !qp
! complex(c16), intent (inout):: d2(*) !qp
! character (len=*),intent (in) :: oper !qp
! integer :: i !qp
! select case (oper) !qp
! case ("+") !qp
! d2(1:n)=d1(1:n)+d2(1:n) !qp
! case ("*") !qp
! d2(1:n)=d1(1:n)*d2(1:n) !qp
! case default !qp
! write(*,*)"error in merge2_dpcomp oper = ",oper !qp
! end select !qp
! end subroutine merge2_qpcomp !qp
! subroutine merge_qp(input,n,output,oper,root,THETAG,THECOM,ierr) !qp
! use ccm_numz !qp
! use ccm_error_mod !qp
! implicit none !qp
! integer, intent(in) :: n !qp
! real(b16), intent(in) :: input(n) !qp
! real(b16), intent (inout):: output(n) !qp
! integer, intent(in) :: root !qp
! integer, intent(inout) :: ierr !qp
! character (len=*),intent (in) :: oper !qp
! integer :: THETAG,THECOM,NTAG,it !qp
! real(b16) , allocatable:: rec(:) !qp
! real(b16) , allocatable :: the_data(:) !qp
! integer :: AN_MPI_TYPE !qp
! integer :: p,active,rank,i !qp
! integer :: status(MPI_STATUS_SIZE) !qp
! logical :: did_get !qp
! did_get=.false. !qp
! AN_MPI_TYPE = mydouble !qp
! rank=myid !qp
! p=numnodes !qp
! if(root .ne. 0)then !qp
! rank=rank-root !qp
! if(rank .lt. 0)rank=(p+rank) !qp
! endif !qp
! active=1 !qp
! do while (2*active < p) !qp
! active=active*2 !qp
! enddo !qp
! if(rank .ge. active)then !qp
! it=rank-active+root !qp
! if(it .ge. p)it=p-it !qp
! if(iand(do_tests,ccm_internal) .ne. 0) & !qp
! write(*,*)"s 1 myid=",myid,rank," to ",it,n,"x=",input !qp
! call MPI_SEND(input,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr) !qp
! if(ierr .ne. 0)then !qp
! write(err_str1,"(""low level communication error:"",i5)")ierr !qp
! call ccm_fatal("reduce",AN_MPI_TYPE) !qp
! endif !qp
! return !qp
! endif !qp
! allocate(the_data(n)) !qp
! if(rank + active .lt. p)then !qp
! if(.not.(allocated(rec))) allocate(rec(n)) !qp
! it=rank+active+root !qp
! if(it .ge. p)it=p-it !qp
! call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr) !qp
! if(iand(do_tests,ccm_internal) .ne. 0) & !qp
! write(*,*)"r 2 myid=",myid,rank," from ",it,n,"x=",rec !qp
! did_get=.true. !qp
! if(ierr .ne. 0)then !qp
! write(err_str1,"(""low level communication error:"",i5)")ierr !qp
! call ccm_fatal("reduce",AN_MPI_TYPE) !qp
! endif !qp
! call merge2_qp(input,n,rec,oper) !qp
! the_data=rec !qp
! endif !qp
! do while (active .gt. 1) !qp
! active=active/2 !qp
! if(rank .ge. active) then !qp
! it=rank-active+root !qp
! if(it .ge. p)it=p-it !qp
! if(.not. did_get)the_data(1:n)=input(1:n) !qp
! if(iand(do_tests,ccm_internal) .ne. 0) & !qp
! write(*,*)"s 3 myid=",myid,rank," to ",it,n,"x=",the_data !qp
! call MPI_SEND(the_data,n,AN_MPI_TYPE,it,THETAG,THECOM,ierr) !qp
! if(ierr .ne. 0)then !qp
! write(err_str1,"(""low level communication error:"",i5)")ierr !qp
! call ccm_fatal("reduce",AN_MPI_TYPE) !qp
! endif !qp
! if(allocated(the_data))deallocate(the_data) !qp
! if(allocated(rec))deallocate(rec) !qp
! return !qp
! else !qp
! if(.not.(allocated(rec))) allocate(rec(n)) !qp
! it=rank+active+root !qp
! if(it .ge. p)it=p-it !qp
! call MPI_RECV(rec,n,AN_MPI_TYPE,it,THETAG,THECOM,status,ierr) !qp
! if(iand(do_tests,ccm_internal) .ne. 0) & !qp
! write(*,*)"r 4 myid=",myid,rank," from ",it,n,"x=",rec !qp
! if(ierr .ne. 0)then !qp
! write(err_str1,"(""low level communication error:"",i5)")ierr !qp
! call ccm_fatal("reduce",AN_MPI_TYPE) !qp
! endif !qp
! if(.not. did_get)the_data(1:n)=input(1:n) !qp
! call merge2_qp(rec,n,the_data,oper) !qp
! did_get=.true. !qp
! endif !qp
! enddo !qp
! if(myid .eq. root)then !qp
! do i=1,n !qp
! output(i)=the_data(i) !qp
! enddo !qp
! endif !qp
! if(allocated(the_data))deallocate(the_data) !qp
! if(allocated(rec))deallocate(rec) !qp
! return !qp
! end subroutine merge_qp !qp
! !qp
! subroutine merge2_qp(d1,n,d2,oper) !qp
! use ccm_numz !qp
! use ccm_error_mod !qp
! integer, intent (in) :: n !qp
! real(b16), intent (in):: d1(*) !qp
! real(b16), intent (inout):: d2(*) !qp
! character (len=*),intent (in) :: oper !qp
! integer :: i !qp
! select case (oper) !qp
! case ("+") !qp
! d2(1:n)=d1(1:n)+d2(1:n) !qp
! case ("min") !qp
! d2(1:n)=min(d1(1:n),d2(1:n)) !qp
! case ("max") !qp
! d2(1:n)=max(d1(1:n),d2(1:n)) !qp
! case ("*") !qp
! d2(1:n)=d1(1:n)*d2(1:n) !qp
! case default !qp
! write(*,*)"error in merge2_qp oper = ",oper !qp
! end select !qp
! end subroutine merge2_qp !qp
! !qp
! subroutine list_qp(x,list,n) !qp
! use ccm_numz !qp
! real(b16) :: x !qp
! character(len=4) :: list(:) !qp
! integer :: n !qp
! n=4 !qp
! list(1:n)=(/"+ ", "min ", "max ", "* "/) !qp
! end subroutine !qp
! !qp
! subroutine list_qpcomp(x,list,n) !qp
! use ccm_numz !qp
! complex(c16) :: x !qp
! character(len=4) :: list(:) !qp
! n=2 !qp
! list(1:n)=(/"+ ", "* "/) !qp
! end subroutine !qp
end module
ccm_numz_mod.f90 0100644 0000765 0000024 00000006521 07601724105 013404 0 ustar tkaiser staff module ccm_numz
! basic real types
integer, parameter:: b8 = selected_real_kind(10)
integer, parameter:: b4 = selected_real_kind(5)
integer, parameter:: c8 = selected_real_kind(10)
integer, parameter:: c4 = selected_real_kind(5)
! integer, parameter:: b8 = selected_real_kind(20)
! integer, parameter:: b4 = selected_real_kind(10)
! integer, parameter:: c8 = selected_real_kind(20)
! integer, parameter:: c4 = selected_real_kind(10)
integer, parameter:: b16= selected_real_kind(20)
integer, parameter:: c16= selected_real_kind(20)
integer, parameter:: alt_int = selected_int_kind(16)
integer, parameter:: def_int = kind(iccm_dummy_int)
! other values used by the api
include "mpif.h"
integer mpi_err
integer, parameter::MPI_ROOT=0
integer, parameter::MPI_ZERO=0
integer,save :: myreal,mydouble
integer,save :: mycomp,mydpcomp
integer,save :: myqp,myqpcomp
integer,save :: myint,mycharacter,mylogical,mylong
integer,save :: mycomm,myid,numnodes
integer :: do_tests,test_ray1(8),test_ray2(8),test_ray3(8)
logical :: passed_test
! public values for the API
integer, parameter :: ccm_alloff=0
integer, parameter :: ccm_checksize=1
integer, parameter :: ccm_deadlock=2
integer, parameter :: ccm_trace=4
integer, parameter :: ccm_internal=8
integer, parameter :: ccm_reproducible=0
integer, parameter :: ccm_fast=1
character (len=12) :: ccm_name
logical,save :: ccm_auto_print
real,save :: ccm_timeout
! error information
character(len=128),save:: err_str0,err_str1,err_str2,err_str3,err_str4
! machine specific values
integer :: ccm_iounits(1) = (/0/)
! if force_four is true then we will define
! our mpi_real in term of mpi_character
logical, parameter :: force_four= .true.
integer :: ccm_start_time(8)
interface
subroutine ccm_check_sizes(routine,test1,test2,test3,root,passed)
character(len=*) :: routine
integer :: test1(:),test2(:),test3(:)
integer :: root
logical :: passed
end subroutine
end interface
contains
function ccm_time()
implicit none
real(b8) :: ccm_time,tmp
integer,parameter :: norm(13)=(/ &
0, 2678400, 5097600, 7776000,10368000,13046400,&
15638400,18316800,20995200,23587200,26265600,28857600,31536000/)
integer,parameter :: leap(13)=(/ &
0, 2678400, 5184000, 7862400,10454400,13132800,&
15724800,18403200,21081600,23673600,26352000,28944000,31622400/)
integer :: values(8),m,sec
call date_and_time(values=values)
if(mod(values(1),4) .eq. 0)then
m=leap(values(2))
else
m=norm(values(2))
endif
sec=((values(3)*24+values(5))*60+values(6))*60+values(7)
tmp=real(m,b8)+real(sec,b8)+real(values(8),b8)/1000.0_b8
!write(*,*)"vals ",values
if(values(1) .ne. ccm_start_time(1))then
if(mod(ccm_start_time(1),4) .eq. 0)then
tmp=tmp+real(leap(13),b8)
else
tmp=tmp+real(norm(13),b8)
endif
endif
ccm_time=tmp
end function
subroutine ccm_testing(get_test,set_test)
implicit none
integer,optional :: get_test,set_test
if(present(get_test))get_test=do_tests
if(present(set_test))do_tests=set_test
end subroutine
end module
ccm_check_sizes.f90 0100644 0000765 0000024 00000020617 07611646475 014066 0 ustar tkaiser staff subroutine ccm_check_sizes(routine,test1,test2,test3,root,passed)
use ccm_numz, only :ccm_testing,err_str1,err_str2,myid,numnodes
use ccm_bcast_mod
use ccm_allreduce_mod
use ccm_alltoall_mod
use ccm_scatter_mod
use ccm_gather_mod
implicit none
character(len=*) :: routine
integer :: test1(:),test2(:),test3(:)
integer :: root
logical :: passed
logical :: tmp
integer,allocatable :: tosend(:),toget(:)
integer :: old_test,dummy,i,dummy4(8)
save tosend,toget
if(.not.allocated(tosend))allocate(tosend(0:numnodes-1),toget(0:numnodes-1))
call ccm_testing(get_test=old_test,set_test=0)
tmp=.true.
select case (routine)
case ("bcast")
tosend=test1(1)
call ccm_alltoall(tosend,toget)
passed=.true.
do i=0,numnodes-1
if(toget(i).ne.test1(1))then
passed=.false.
exit
endif
enddo
if(.not. passed)then
write(err_str1,"(""size of input / output array "",i8)")test1(1)
write(err_str2,"(""needs to be the same on all processors"")")
endif
case ("gather")
dummy=test2(1)
call ccm_bcast(dummy,root=root)
if(dummy .ne. test1(1))tmp=.false.
call ccm_allreduce(tmp,passed,"and")
if(.not. passed)then
if(myid .eq. root)then
write(err_str1,*)"output array size/processor: ",dummy
else
write(err_str1,*)" "
endif
write(err_str2,*)"input array size: ",test1(1)
endif
case ("scatter")
dummy=test2(1)
call ccm_bcast(dummy,root=root)
if(dummy .ne. test1(1))tmp=.false.
call ccm_allreduce(tmp,passed,"and")
if(.not. passed)then
if(myid .eq. root)then
write(err_str1,*)"input array size/processor: ",dummy
else
write(err_str1,*)" "
endif
write(err_str2,*)"output array size: ",test1(1)
endif
case ("gatherv")
! test2(1)=size(xin)
! test2(2)=size(xout)
! test2(3)=size(to_get)
! test2(4)=size(offset)
! test1=to_send
if(myid .eq. root)then
i=0
if(test2(3) .ne. numnodes)then !to_get wrong size
tmp=.false.
tosend=-1
i=1
else
tosend(0:numnodes-1)=test1(1:numnodes)
endif
if(tmp)then
if(sum(tosend) .ne. test2(2))then
i=2
tmp=.false. ! out wrong size
endif
endif
if(tmp)then
if(test2(4) .ne. numnodes)then
i=3
tmp=.false. ! offset wrong size
endif
endif
endif
call ccm_scatter(tosend,dummy,root=root)
if(dummy .ne. test2(1))tmp=.false.
call ccm_allreduce(tmp,passed,"and")
if(.not. passed)then
if(myid .eq. root)then
if(i .eq. 0)write(err_str1,120)
120 format("mismatch between to_get and size of input array")
if(i .eq. 1)write(err_str1,121)test2(3)
121 format("size of to_get is wrong, to_get size=",i8)
if(i .eq. 2)write(err_str1,122)
122 format("mismatch between to_get and size of output array")
if(i .eq. 3)write(err_str1,123)test2(4)
123 format("size of offset is wrong, offset size=",i8)
write(err_str2,"(""expecting to send and get "",2i8)")test2(1),test2(2)
else
write(err_str1,*)" "
write(err_str2,"(""expecting to send"",i8)")test2(1)
endif
endif
case ("scatterv")
! test2(1)=size(xin)
! test2(2)=size(xout)
! test2(3)=size(to_send)
! test2(4)=size(offset)
! test1=to_send
if(myid .eq. root)then
i=0
if(test2(3) .ne. numnodes)then !to_send wrong size
tmp=.false.
tosend=-1
i=1
else
tosend(0:numnodes-1)=test1(1:numnodes)
endif
if(tmp)then
if(sum(tosend) .ne. test2(1))then
i=2
tmp=.false. ! xin wrong size
endif
endif
if(tmp)then
if(test2(4) .ne. numnodes)then
i=3
tmp=.false. ! offset wrong size
endif
endif
endif
call ccm_scatter(tosend,dummy,root=root)
if(dummy .ne. test2(2))tmp=.false.
call ccm_allreduce(tmp,passed,"and")
if(.not. passed)then
if(myid .eq. root)then
if(i .eq. 0)write(err_str1,20)
20 format("mismatch between to_send and size of output array")
if(i .eq. 1)write(err_str1,21)test2(3)
21 format("size of to_send is wrong, to_send size=",i8)
if(i .eq. 2)write(err_str1,22)
22 format("mismatch between to_send and size of input array")
if(i .eq. 3)write(err_str1,23)test2(4)
23 format("size of offset is wrong, offset size=",i8)
write(err_str2,"(""expecting to send and get "",2i8)")test2(2),test2(2)
else
write(err_str1,*)" "
write(err_str2,"(""expecting to get"",i8)")test2(2)
endif
endif
case ("alltoall")
dummy=test2(2)
call ccm_bcast(dummy)
if(dummy*numnodes .ne. test1(1))tmp=.false.
if(dummy*numnodes .ne. test2(1))tmp=.false.
call ccm_allreduce(tmp,passed,"and")
if(.not. passed)then
write(err_str1,"(""size of input and output arrays "",2i8)")test1(1),test2(1)
if(mod(test1(1),numnodes) .ne. 0 .or. mod(test2(1),numnodes) .ne. 0)then
write(err_str2,"(""need to be = and multiple of number of tasks : "",i4)")numnodes
else
write(err_str2,"(""need to be equal"")")
endif
endif
case ("alltoallv")
tmp=.true.
!check for match of to_send and to_get arrays between processors
if(test3(1) .eq. numnodes .and. test3(2) .eq. numnodes)then
tosend(0:numnodes-1)=test1(1:numnodes)
else
tosend=-1
tmp=.false.
write(err_str1,*)"to_send and/or to_get array size is wrong"
write(err_str2,1)test3(1),test3(2)
1 format("size(to_send)= ",i8," size(to_get)= ",i8)
endif
if(tmp)then
if(test3(3) .ne. numnodes)then
write(err_str1,3)test3(3)
3 format("pid ",i8," get_off array wrong size ",i8)
write(err_str2,*)"should be ",numnodes
tmp=.false.
endif
endif
if(tmp)then
if(test3(4) .ne. numnodes)then
write(err_str1,4)test3(4)
4 format("pid ",i8," send_off array wrong size ",i8)
write(err_str2,*)"should be ",numnodes
tmp=.false.
endif
endif
if(tmp)then
if(test3(5) .ne. test3(7))then
write(err_str1,5)test3(5),test3(7)
5 format("pid ",i8," sum(to_send) ",i8," .ne. size(xin) ",i8)
write(err_str2,*)"should be the same"
tmp=.false.
endif
endif
if(tmp)then
if(test3(6) .ne. test3(8))then
write(err_str1,6)test3(6),test3(8)
6 format("pid ",i8," sum(to_get) ",i8," .ne. size(xout) ",i8)
write(err_str2,*)"should be the same"
tmp=.false.
endif
endif
call ccm_alltoall(tosend,toget)
if(tmp)then
do i=0,min(test3(1),test3(2))-1
if(toget(i).ne.test2(i+1))then
tmp=.false.
write(err_str1,*)"mismatch between number being received and sent"
write(err_str2,2)myid,toget(i),test2(i+1)
2 format("pid ",i8," sending, pid ",i8," getting",i8)
exit
endif
enddo
endif
call ccm_allreduce(tmp,passed,"and")
case ("allreduce")
dummy4(1:8)=test1(1:8)
call ccm_bcast(dummy4)
dummy=0
! do i=1,8
! if(dummy4(i) .ne. test2(i) .or. test1(i) .ne. test2(i))dummy=1
! enddo
if(product(dummy4(1:8),mask=dummy4 .ne. 0) .ne. product(test1(1:8),mask=test1 .ne. 0))dummy=1
if(product(test2(1:8),mask=test2 .ne. 0) .ne. product(test1(1:8),mask=test1 .ne. 0))dummy=1
tosend=0
call ccm_gather(dummy,tosend)
if(myid .eq. 0)then
if(sum(tosend) .gt. 0)tmp=.false.
endif
call ccm_bcast(tmp)
passed=tmp
if(.not. passed)then
write(err_str1,*)"shape of input array: ",test1," shape of output array: ",test2
write(err_str2,*)"sizes should be the same on all processors"
endif
case ("reduce")
dummy4(1:8)=test2(1:8)
call ccm_bcast(dummy4,root=root)
dummy=0
! do i=1,8
! if(dummy4(i) .ne. test1(i))dummy=1
! enddo
if(product(dummy4(1:8),mask=dummy4 .ne. 0) .ne. product(test1(1:8),mask=test1 .ne. 0))dummy=1
tosend=0
call ccm_gather(dummy,tosend)
if(myid .eq. 0)then
if(sum(tosend) .gt. 0)tmp=.false.
endif
call ccm_bcast(tmp)
passed=tmp
if(.not. passed)then
if(myid .eq. root)then
write(err_str1,*)"shape of input array: ",test1," shape of output array: ",test2
else
write(err_str1,*)"shape of input array: ",test1
endif
write(err_str2,*)"sizes should be the same arcoss all processors"
endif
case default
end select
call ccm_testing(set_test=old_test)
end subroutine
ccm_rs_mod.f90 0100644 0000765 0000024 00000000077 07601724105 013037 0 ustar tkaiser staff module ccm_rs_mod
use ccm_rank_mod
use ccm_size_mod
end module
ccm_allreduce_mod.input 0100644 0000765 0000024 00000015356 07601724105 015122 0 ustar tkaiser staff .false.
allreduce
0 3 0 3
sp real(b4) myreal
dp real(b8) mydouble
in integer(def_int) myint
comp complex(c4) mycomp
dpcomp complex(c8) mydpcomp
logical logical mylogical
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
subroutine $1_$2(xin,xout,oper,constraint,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
same
end subroutine $1_$2
subroutine $1_$2(xin,xout,oper,constraint,the_err)
!base2 rank input .ne. 0 .and./.or. rank output .ne. 0
use ccm_merge
use ccm_numz
use ccm_error_mod
use ccm_bcast_mod
use ccm_rs_mod
use ccm_barrier_mod, only : ccm_checkin
implicit none
$3,intent (in) :: xin$4 ! == input
$3,intent (out) :: xout$5 ! == output to root
integer,intent(in), optional :: constraint ! == force reproduceability
character (len= *),intent (in) :: oper ! == operator +, *, min, max...
! or mpi like names sum, prod
! assumed mpi_comm_world & root
integer, optional, intent(out) :: the_err ! == error value
integer, parameter :: local_root=0
integer :: n
integer :: shape_1d(1)
integer ,allocatable :: shape_in(:),shape_out(:)
$3,allocatable :: temp(:)
$3 :: temp2(1) ! r1=0
integer :: s1,s2
integer :: i,i1,i2
logical :: illegal
integer :: method
integer :: mpi_oper
character(len=4) :: my_oper
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(xin)
r2=ccm_rank(xout)
mytype=$6
if(iand(do_tests,ccm_deadlock) .ne. 0) then
call ccm_checkin(60.0,"$1",mpi_err)
if(mpi_err .ne. 0)call ccm_fatal("$1",mytype,r1,r2)
endif
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
shape_1d(1)=size(xin) ! r1>0
shape_1d(1)=1 ! r1=0
n=min(shape_1d(1),size(xout)) ! r2>0
n=min(shape_1d(1),1) ! r2=0
!start of error tests
call get_oper(oper,mytype,mpi_oper,my_oper,illegal)
if(illegal)then
write(err_str1,*)"illegal operation for reduction: ",oper
write(err_str2,*)" "
call ccm_warning("allreduce",mytype,r1,r2)
if(present(the_err))the_err=1
return
endif
!end of error tests
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
s1=max(r1,1)
s2=max(r2,1)
allocate(shape_in(s1),shape_out(s2))
test_ray1=0
test_ray2=0
shape_in=1
shape_out=1
shape_in=shape(xin) ! r1>0
shape_out=shape(xout) ! r2>0
test_ray1(1:s1)=shape_in(1:s1)
test_ray2(1:s2)=shape_out(1:s2)
call ccm_check_sizes("allreduce",test_ray1,test_ray2,test_ray3,local_root,passed_test)
if(.not.passed_test)then
call ccm_warning("allreduce",mytype,r1,r2)
if(present(the_err))the_err=1
deallocate(shape_in,shape_out)
return
endif
deallocate(shape_in,shape_out)
endif
!end of error tests
if(present(constraint))then
method=constraint
else
method=ccm_reproducible
endif
if(method .eq. ccm_fast)then
if(mytype .eq. myint )mytype=mpi_integer
if(mytype .eq. mylogical)mytype=mpi_logical
if(mytype .eq. myreal)then
call MPI_TYPE_EXTENT(mytype,i1,mpi_err)
call MPI_TYPE_EXTENT(mpi_real,i2,mpi_err)
if(i1 .eq. i2)then
mytype=mpi_real
else
method=ccm_reproducible
endif
endif
if(mytype .eq. mydouble)then
call MPI_TYPE_EXTENT(mytype,i1,mpi_err)
call MPI_TYPE_EXTENT(mpi_double_precision,i2,mpi_err)
if(i1 .eq. i2)then
mytype=mpi_double_precision
else
method=ccm_reproducible
endif
endif
if(mytype .eq. mycomp )then
call MPI_TYPE_EXTENT(mytype,i1,mpi_err)
call MPI_TYPE_EXTENT(mpi_complex,i2,mpi_err)
if(i1 .eq. i2)then
mytype=mpi_complex
else
method=ccm_reproducible
endif
endif
if(mytype .eq. myqp)then
method=ccm_reproducible
endif
if(mytype .eq. myqpcomp)then
method=ccm_reproducible
endif
if(mytype .eq. mydpcomp)then
call MPI_TYPE_EXTENT(mytype,i1,mpi_err)
call MPI_TYPE_EXTENT(mpi_complex,i2,mpi_err)
if(i1 .eq. i2)then
mytype=mpi_complex
else
method=ccm_reproducible
endif
endif
endif
if(n .gt. 0)then
select case (method)
case (ccm_fast)
call mpi_allreduce(xin,xout,n,mytype,mpi_oper,mycomm,mpi_err)
case default
allocate(temp(n))
call merge_it(reshape(xin,shape_1d),n,temp,my_oper,local_root,1234,mycomm,mpi_err) ! r1>0
temp2=xin ! r1=0
call merge_it(temp2,n,temp,my_oper,local_root,1234,mycomm,mpi_err) ! r1=0
call ccm_bcast(temp,root=local_root,the_err=mpi_err)
xout=reshape(temp,shape(xout)) ! r2>0
xout=temp(1) ! r2=0
deallocate(temp)
end select
endif
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("allreduce",mytype,r1,r2)
endif
end subroutine $1_$2
ccm_alltoall_mod.input 0100644 0000765 0000024 00000004562 07601724105 014763 0 ustar tkaiser staff .false.
alltoall
1 3 1 3
sp real(b4) myreal
dp real(b8) mydouble
in integer(def_int) myint
comp complex(c4) mycomp
dpcomp complex(c8) mydpcomp
logical logical mylogical
character character mycharacter
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
subroutine $1_$2(xin,xout,the_err) !(xin,xout,to_get,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
end subroutine $1_$2
subroutine $1_$2(xin,xout,the_err) !(xin,xout,to_get,the_err)
!base2 rank input .ne. 0 .and. rank output .ne. 0
use ccm_numz
use ccm_error_mod
use ccm_rs_mod
use ccm_barrier_mod, only : ccm_checkin
implicit none
$3,intent (in) :: xin$4 ! == input at root
$3,intent (out) :: xout$5 ! == output to other processors
! integer,intent(in),optional :: to_get ! == number of values to send to each processor
integer, intent (out),optional :: the_err ! == error value
integer :: get_local,n
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(xin)
r2=ccm_rank(xout)
mytype=$6
if(iand(do_tests,ccm_deadlock) .ne. 0) then
call ccm_checkin(60.0,"$1",mpi_err)
if(mpi_err .ne. 0)call ccm_fatal("$1",mytype,r1,r2)
endif
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
get_local=size(xin)/numnodes
n=min(size(xin),size(xout))
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
test_ray1(1)=size(xin)
test_ray2(1)=size(xout)
test_ray2(2)=get_local
passed_test=.true.
call ccm_check_sizes("alltoall",test_ray1,test_ray2,test_ray3,0,passed_test)
if(.not. passed_test)then
call ccm_warning("alltoall",mytype,r1,r2)
if(present(the_err))the_err=1
return
endif
endif
!end of error tests
call mpi_alltoall(xin, get_local,mytype,&
xout,get_local,mytype,&
mycomm,mpi_err)
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("alltoallv",mytype,r1,r2)
endif
end subroutine $1_$2
ccm_alltoallv_mod.input 0100644 0000765 0000024 00000007005 07601724105 015144 0 ustar tkaiser staff .false.
alltoallv
1 3 1 3
sp real(b4) myreal
dp real(b8) mydouble
in integer(def_int) myint
comp complex(c4) mycomp
dpcomp complex(c8) mydpcomp
logical logical mylogical
character character mycharacter
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
subroutine $1_$2(xin,xout,to_send,to_get,get_off,send_off,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
end subroutine $1_$2
subroutine $1_$2(xin,xout,to_send,to_get,send_off,get_off,the_err)
!base2 rank input .ne. 0 .and. rank output .ne. 0
use ccm_numz
use ccm_error_mod
use ccm_rs_mod
use ccm_barrier_mod, only : ccm_checkin
implicit none
$3,intent (in) :: xin$4 ! == input at root
$3,intent (out) :: xout$5 ! == output to other processors
integer,intent(in) :: to_send(:) ! == number of values to send to each processor
integer,intent(in) :: to_get(:) ! == number of values to get from each processor
integer,optional :: get_off(:) ! == offset of values to get from each processor
integer,optional :: send_off(:) ! == offset of values to send to each processor
integer, intent (out),optional :: the_err ! == error value
integer,allocatable :: sdisp(:),rdisp(:)
integer :: i
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(xin)
r2=ccm_rank(xout)
mytype=$6
allocate(sdisp(numnodes))
allocate(rdisp(numnodes))
if(iand(do_tests,ccm_deadlock) .ne. 0) then
call ccm_checkin(60.0,"$1",mpi_err)
if(mpi_err .ne. 0)call ccm_fatal("$1",mytype,r1,r2)
endif
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
test_ray3(1)=size(to_send)
test_ray3(2)=size(to_get)
test_ray3(3)=numnodes
test_ray3(4)=numnodes
if(present(get_off))test_ray3(3)=size(get_off)
if(present(send_off))test_ray3(4)=size(send_off)
test_ray3(5)=sum(to_send)
test_ray3(6)=sum(to_get)
test_ray3(7)=size(xin)
test_ray3(8)=size(xout)
call ccm_check_sizes("alltoallv",to_send,to_get,test_ray3,0,passed_test)
if(.not. passed_test)then
call ccm_warning("alltoallv",mytype,r1,r2)
if(present(the_err))the_err=1
return
endif
endif
!end of error tests
if(present(get_off))then
rdisp=get_off(1:numnodes)
else
rdisp(1)=0
do i=2,numnodes
rdisp(i)=rdisp(i-1)+to_get(i-1)
enddo
endif
if(present(send_off))then
sdisp=send_off(1:numnodes)
else
sdisp(1)=0
do i=2,numnodes
sdisp(i)=sdisp(i-1)+to_send(i-1)
enddo
endif
call mpi_alltoallv(xin,to_send,sdisp,mytype,&
xout, to_get,rdisp,mytype,&
mycomm,mpi_err)
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("alltoallv",mytype,r1,r2)
endif
deallocate(sdisp,rdisp)
end subroutine $1_$2
ccm_bcast_mod.input 0100644 0000765 0000024 00000004256 07601724105 014253 0 ustar tkaiser staff .true.
bcast
0 3 0 3
sp real(b4) myreal
dp real(b8) mydouble
in integer(def_int) myint
comp complex(c4) mycomp
dpcomp complex(c8) mydpcomp
logical logical mylogical
character character mycharacter
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
subroutine $1_$2(x,root,the_err) !(x,to_send,root,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
same
end subroutine $1_$2
subroutine $1_$2(x,root,the_err) !(x,to_send,root,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
use ccm_numz
use ccm_error_mod
use ccm_rs_mod
use ccm_barrier_mod, only : ccm_checkin
implicit none
$3,intent (inout) :: x$4
integer, optional, intent(in) :: root
integer, optional, intent(out) :: the_err
integer :: local_root,local_size
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(x)
r2=r1
mytype=$6
if(iand(do_tests,ccm_deadlock) .ne. 0) then
call ccm_checkin(60.0,"$1",mpi_err)
if(mpi_err .ne. 0)call ccm_fatal("$1",mytype,r1,r2)
endif
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
if(present(root))then
local_root=root
else
local_root=MPI_ROOT
endif
local_size=1 ! r1=0
local_size=size(x) ! r1>0
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
test_ray1(1)=local_size
call ccm_check_sizes("bcast",test_ray1,test_ray2,test_ray3,local_root,passed_test)
if(.not. passed_test)then
call ccm_warning("bcast",mytype,r1)
if(present(the_err))the_err=1
return
endif
endif
!end of error tests
call MPI_BCAST(x,local_size,mytype,local_root,mycomm,MPI_ERR)
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("bcast",mytype,r1)
endif
end subroutine $1_$2
ccm_gather_mod.input 0100644 0000765 0000024 00000005354 07602707302 014432 0 ustar tkaiser staff .false.
gather
0 3 1 3
sp real(b4) myreal put32
dp real(b8) mydouble put64
in integer(def_int) myint integer_put
comp complex(c4) mycomp put64
dpcomp complex(c8) mydpcomp put128
logical logical mylogical logical_put
character character mycharacter character_put
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
subroutine $1_$2(xin,xout,root,the_err) !(xin,xout,to_get,root,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
end subroutine $1_$2
subroutine $1_$2(xin,xout,root,the_err) !(xin,xout,to_get,root,the_err)
!base2 rank input .ne. 0 .or. rank output .ne. 0
use ccm_numz
use ccm_error_mod
use ccm_rs_mod
use ccm_barrier_mod, only : ccm_checkin
implicit none
$3,intent (in) :: xin$4 ! == input at root
$3,intent (out) :: xout$5 ! == output to other processors
integer,intent (in), optional :: root ! == root processor
integer, intent (out),optional :: the_err ! == error value
$3 :: xdummy
integer :: local_root,get_local
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(xin)
r2=ccm_rank(xout)
mytype=$6
if(iand(do_tests,ccm_deadlock) .ne. 0) then
call ccm_checkin(60.0,"$1",mpi_err)
if(mpi_err .ne. 0)call ccm_fatal("$1",mytype,r1,r2)
endif
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
if(present(root))then
local_root=root
else
local_root=0
endif
get_local=size(xin) ! r1>0
get_local=1 ! r1=0
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
test_ray1(1)=get_local
if(myid .eq. local_root)then
test_ray2(1)=size(xout)/numnodes
else
test_ray2(1)=-1
endif
call ccm_check_sizes("gather",test_ray1,test_ray2,test_ray3,local_root,passed_test)
if(.not. passed_test)then
call ccm_warning("gather",mytype,r1,r2)
if(present(the_err))the_err=1
return
endif
endif
!end of error tests
if(myid .eq. local_root)then
call mpi_gather(xin, get_local,mytype,&
xout,get_local,mytype,&
local_root,mycomm,mpi_err)
else
call mpi_gather(xin, get_local,mytype,&
xdummy,get_local,mytype,&
local_root,mycomm,mpi_err)
endif
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("gather",mytype,r1,r2)
endif
end subroutine $1_$2
ccm_gatherv_mod.input 0100644 0000765 0000024 00000007763 07602707006 014627 0 ustar tkaiser staff .false.
gatherv
1 3 1 3
sp real(b4) myreal put32
dp real(b8) mydouble put64
in integer(def_int) myint integer_put
comp complex(c4) mycomp put64
dpcomp complex(c8) mydpcomp put128
logical logical mylogical logical_put
character character mycharacter character_put
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
subroutine $1_$2(xin,xout,to_get,offset,root,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
end subroutine $1_$2
subroutine $1_$2(xin,xout,to_get,offset,root,the_err)
!base2 rank input .ne. 0 .and. rank output .ne. 0
use ccm_numz
use ccm_error_mod
use ccm_rs_mod
use ccm_barrier_mod, only : ccm_checkin
implicit none
$3,intent (in) :: xin$4 ! == input on all processors
$3,intent (out) :: xout$5 ! == output at root
integer,intent(in) :: to_get(:) ! == number of values to get from each processor
integer,optional,intent(in) :: offset(:) ! == offsets where to put values
! relative to xout(1)
integer,intent (in), optional :: root ! == root processor
integer, intent (out),optional :: the_err ! == error value
$3 :: xdummy
integer idummy(1)
integer :: local_root,local1,i
integer,allocatable :: disp(:)
integer :: local_get
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(xin)
r2=ccm_rank(xout)
mytype=$6
if(iand(do_tests,ccm_deadlock) .ne. 0) then
call ccm_checkin(60.0,"$1",mpi_err)
if(mpi_err .ne. 0)call ccm_fatal("$1",mytype,r1,r2)
endif
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
if(present(root))then
local_root=root
else
local_root=0
endif
local1=size(xin)
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
passed_test=.true.
test_ray2(1)=local1
if(myid .eq. local_root)then
test_ray2(2)=size(xout)
test_ray2(3)=size(to_get)
else
test_ray2(2)=-2
test_ray2(3)=-1
endif
test_ray2(4)=numnodes
if(myid .eq. local_root .and. present(offset))test_ray2(4)=size(offset)
call ccm_check_sizes("gatherv",to_get,test_ray2,test_ray3,local_root,passed_test)
if(.not. passed_test)then
call ccm_warning("gatherv",mytype,r1,r2)
if(present(the_err))the_err=-1
return
endif
endif
!end of error tests
if(myid .eq. local_root)then
allocate(disp(numnodes))
if(present(offset))then
if(size(offset) .eq. numnodes)then
disp(1:numnodes)=offset(1:numnodes)
else
disp=0
endif
else
disp(1)=0
do i=2,numnodes
disp(i)=disp(i-1)+to_get(i-1)
enddo
endif
endif
if(myid .eq. local_root)then
call mpi_gatherv(xin, local1,mytype,&
xout, to_get,disp,mytype,&
local_root,mycomm,mpi_err)
else
call mpi_gatherv(xin, local1,mytype,&
xdummy, idummy,idummy,mytype,&
local_root,mycomm,mpi_err)
endif
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("gather",mytype,r1,r2)
endif
if(allocated(disp))deallocate(disp)
end subroutine $1_$2
ccm_rank_mod.input 0100644 0000765 0000024 00000000660 07601724105 014105 0 ustar tkaiser staff .true.
rank
0 3 0 3
sp real(b4) myreal
dp real(b8) mydouble
in integer(def_int) myint
comp complex(c4) mycomp
dpcomp complex(c8) mydpcomp
logical logical mylogical
character character mycharacter
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
function $1_$2(x)
use ccm_numz
integer :: $1_$2
$3 :: x
$1_$2=$m
end function $1_$2
function $1_$2(x)
use ccm_numz
integer :: $1_$2
$3 :: x$4
$1_$2=$m
end function $1_$2
ccm_reduce_mod.input 0100644 0000765 0000024 00000015661 07601724105 014430 0 ustar tkaiser staff .false.
reduce
0 3 0 3
sp real(b4) myreal
dp real(b8) mydouble
in integer(def_int) myint
comp complex(c4) mycomp
dpcomp complex(c8) mydpcomp
logical logical mylogical
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
subroutine $1_$2(xin,xout,oper,constraint,root,the_err)
!base2 rank input .ne. 0 .and. rank output .ne. 0
same
end subroutine $1_$2
subroutine $1_$2(xin,xout,oper,constraint,root,the_err)
!base2 rank input .ne. 0 .and. rank output .ne. 0
use ccm_merge
use ccm_numz
use ccm_error_mod
use ccm_rs_mod
use ccm_barrier_mod, only : ccm_checkin
implicit none
$3,intent (in) :: xin$4 ! == input
$3,intent (out) :: xout$5 ! == output to root
integer,intent(in), optional :: constraint ! == force reproduceability
character (len= *),intent (in) :: oper ! == operator +, *, min, max...
! or mpi like names sum, prod
integer,intent (in), optional :: root ! == root for communication
integer, intent (out),optional :: the_err ! == error value
integer :: local_root,n,method
logical :: illegal
integer :: mpi_oper
character(len=4) :: my_oper
integer :: shape_1d(1)
integer ,allocatable :: shape_in(:),shape_out(:)
$3,allocatable :: temp(:)
$3 :: xpass(1)
integer :: i,i1,i2
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(xin)
r2=ccm_rank(xout)
mytype=$6
if(iand(do_tests,ccm_deadlock) .ne. 0) then
call ccm_checkin(60.0,"$1",mpi_err)
if(mpi_err .ne. 0)call ccm_fatal("$1",mytype,r1,r2)
endif
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
if(present(constraint))then
method=constraint
else
method=ccm_reproducible
endif
if(present(root))then
local_root=root
else
local_root=0
endif
n=size(xin) ! r1>0
n=1 ! r1=0
shape_1d(1)=n
!start of error tests
call get_oper(oper,mytype,mpi_oper,my_oper,illegal)
if(illegal)then
write(err_str1,*)"illegal operation for reduction: ",oper
write(err_str2,*)" "
call ccm_warning("reduce",mytype,r1,r2)
if(present(the_err))the_err=1
return
endif
!end of error tests
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
allocate(shape_in(max(r1,1)))
allocate(shape_out(max(r2,1)))
shape_in=shape(xin) ! r1>0
shape_in=1 ! r1=0
if(myid .eq. local_root)then
shape_out=shape(xout) ! r2>0
shape_out=1 ! r2=0
else
shape_out=1
endif
test_ray1=0
test_ray2=0
if(r1.gt.0)then
test_ray1(1:r1)=shape_in(1:r1)
else
test_ray1(1)=1
endif
if(r2.gt.0)then
test_ray2(1:r2)=shape_out(1:r2)
else
test_ray2(1)=1
endif
call ccm_check_sizes("reduce",test_ray1,test_ray2,test_ray3,local_root,passed_test)
deallocate(shape_in,shape_out)
if(.not.passed_test)then
call ccm_warning("reduce",mytype,r1,r2)
if(present(the_err))the_err=1
return
endif
endif
!end of error tests
if(myid .eq. local_root)then
allocate(temp(size(xout))) ! r2>0
allocate(temp(1)) ! r2=0
else
allocate(temp(1))
endif
if(method .eq. ccm_fast)then
if(mytype .eq. myint )mytype=mpi_integer
if(mytype .eq. mylogical)mytype=mpi_logical
if(mytype .eq. myreal)then
call MPI_TYPE_EXTENT(mytype,i1,mpi_err)
call MPI_TYPE_EXTENT(mpi_real,i2,mpi_err)
if(i1 .eq. i2)then
mytype=mpi_real
else
method=ccm_reproducible
endif
endif
if(mytype .eq. mydouble)then
call MPI_TYPE_EXTENT(mytype,i1,mpi_err)
call MPI_TYPE_EXTENT(mpi_double_precision,i2,mpi_err)
if(i1 .eq. i2)then
mytype=mpi_double_precision
else
method=ccm_reproducible
endif
endif
if(mytype .eq. mycomp )then
call MPI_TYPE_EXTENT(mytype,i1,mpi_err)
call MPI_TYPE_EXTENT(mpi_complex,i2,mpi_err)
if(i1 .eq. i2)then
mytype=mpi_complex
else
method=ccm_reproducible
endif
endif
if(mytype .eq. myqp)then
method=ccm_reproducible
endif
if(mytype .eq. myqpcomp)then
method=ccm_reproducible
endif
if(mytype .eq. mydpcomp)then
call MPI_TYPE_EXTENT(mytype,i1,mpi_err)
call MPI_TYPE_EXTENT(mpi_complex,i2,mpi_err)
if(i1 .eq. i2)then
mytype=mpi_complex
else
method=ccm_reproducible
endif
endif
endif
! if(n .lt. size(xout))temp=reshape(xout,shape(temp))
select case (method)
case (ccm_fast)
xpass=xin ! r1=0
call mpi_reduce(xpass , & ! r1=0
call mpi_reduce(reshape(xin,shape_1d), & ! r1>0
temp,n,mytype,mpi_oper,local_root,mycomm,mpi_err)
case default
xpass=xin ! r1=0
call merge_it(xpass , & ! r1=0
call merge_it(reshape(xin,shape_1d), & ! r1>0
n,temp,my_oper,local_root,1234,mycomm,mpi_err)
end select
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("reduce",mytype,r1,r2)
endif
if(myid .eq. local_root)then
xout=reshape(temp,shape(xout)) ! r2>0
xout=temp(1) ! r2=0
endif
deallocate(temp)
end subroutine $1_$2
ccm_scatter_mod.input 0100644 0000765 0000024 00000005704 07602703076 014630 0 ustar tkaiser staff .false.
scatter
1 3 0 3
sp real(b4) myreal put32
dp real(b8) mydouble put64
in integer(def_int) myint integer_put
comp complex(c4) mycomp put64
dpcomp complex(c8) mydpcomp put128
logical logical mylogical logical_put
character character mycharacter character_put
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
subroutine $1_$2(xin,xout,root,the_err) !(xin,xout,to_send,root,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
end subroutine $1_$2
subroutine $1_$2(xin,xout,root,the_err) !(xin,xout,to_send,root,the_err)
!base1 rank input .ne. 0 .and. rank output .ne. 0
use ccm_numz
use ccm_error_mod
use ccm_rs_mod
use ccm_barrier_mod, only : ccm_checkin
implicit none
$3,intent (in) :: xin$4 ! == input at root
$3,intent (out) :: xout$5 ! == output to other processors
integer,intent (in), optional :: root ! == root processor
integer, intent (out),optional :: the_err ! == error value
$3 :: xdummy
integer :: local_root
integer :: local_send
integer :: size_xout
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(xin)
r2=ccm_rank(xout)
mytype=$6
if(iand(do_tests,ccm_deadlock) .ne. 0) then
call ccm_checkin(60.0,"$1",mpi_err)
if(mpi_err .ne. 0)call ccm_fatal("$1",mytype,r1,r2)
endif
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
if(present(root))then
local_root=root
else
local_root=0
endif
local_send=size(xout) ! r2>0
local_send=1 ! r2=0
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
test_ray1(1)=local_send
if(myid .eq. local_root)then
test_ray2(1)=size(xin)/numnodes
else
test_ray2(1)=-1
endif
call ccm_check_sizes("scatter",test_ray1,test_ray2,test_ray3,local_root,passed_test)
if(.not. passed_test)then
call ccm_warning("scatter",mytype,r1,r2)
if(present(the_err))the_err=1
return
endif
endif
!end of error tests
if(myid .eq. local_root)then
call mpi_scatter(xin, local_send,mytype,&
xout,local_send,mytype,&
local_root,mycomm,mpi_err)
else
call mpi_scatter(xdummy, local_send,mytype,&
xout,local_send,mytype,&
local_root,mycomm,mpi_err)
endif
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("scatter",mytype,r1,r2)
endif
end subroutine $1_$2
ccm_scatterv_mod.input 0100644 0000765 0000024 00000007724 07602706177 015027 0 ustar tkaiser staff .false.
scatterv
1 3 1 3
sp real(b4) myreal put32
dp real(b8) mydouble put64
in integer(def_int) myint integer_put
comp complex(c4) mycomp put64
dpcomp complex(c8) mydpcomp put128
logical logical mylogical logical_put
character character mycharacter logical_put
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
subroutine $1_$2(xin,xout,to_send,offset,root,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
end subroutine $1_$2
subroutine $1_$2(xin,xout,to_send,offset,root,the_err)
!base2 rank input .ne. 0 .and. rank output .ne. 0
use ccm_numz
use ccm_error_mod
use ccm_rs_mod
use ccm_barrier_mod, only : ccm_checkin
implicit none
$3,intent (in) :: xin$4 ! == input at root
$3,intent (out) :: xout$5 ! == output to other processors
integer,intent(in) :: to_send(:) ! == number of values to send to each processor
integer,optional,intent(in) :: offset(:) ! == offsets where to put values
! relative to xout(1)
integer,intent (in), optional :: root ! == root processor
integer, intent (out),optional :: the_err ! == error value
$3 :: xdummy
integer idummy(1)
integer :: local_root,local1,i
integer,allocatable :: disp(:)
integer :: local_send
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(xin)
r2=ccm_rank(xout)
mytype=$6
if(iand(do_tests,ccm_deadlock) .ne. 0) then
call ccm_checkin(60.0,"$1",mpi_err)
if(mpi_err .ne. 0)call ccm_fatal("$1",mytype,r1,r2)
endif
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
if(present(root))then
local_root=root
else
local_root=0
endif
local1=size(xout)
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
passed_test=.true.
if(myid .eq. local_root)then
test_ray2(1)=size(xin)
else
test_ray2(1)=-1
endif
test_ray2(2)=local1 ! size(xout)
if(myid .eq. local_root)then
test_ray2(3)=size(to_send)
else
test_ray2(3)=-1
endif
test_ray2(4)=numnodes
if(myid .eq. local_root .and. present(offset))test_ray2(4)=size(offset)
call ccm_check_sizes("scatterv",to_send,test_ray2,test_ray3,local_root,passed_test)
if(.not. passed_test)then
call ccm_warning("scatterv",mytype,r1,r2)
if(present(the_err))the_err=-1
return
endif
endif
!end of error tests
if(myid .eq. local_root)then
allocate(disp(numnodes))
if(present(offset))then
disp(1:numnodes)=offset(1:numnodes)
else
disp(1)=0
do i=2,numnodes
disp(i)=disp(i-1)+to_send(i-1)
enddo
endif
endif
if(myid .eq. local_root)then
call mpi_scatterv(xin, to_send,disp,mytype,&
xout,local1,mytype,&
local_root,mycomm,mpi_err)
else
call mpi_scatterv(xdummy,idummy,idummy,mytype,&
xout,local1,mytype,&
local_root,mycomm,mpi_err)
endif
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("scatterv",mytype,r1,r2)
endif
if(allocated(disp))deallocate(disp)
end subroutine $1_$2
ccm_size_mod.input 0100644 0000765 0000024 00000000756 07601724105 014132 0 ustar tkaiser staff .true.
size
0 3 0 3
sp real(b4) myreal
dp real(b8) mydouble
in integer(def_int) myint
comp complex(c4) mycomp
dpcomp complex(c8) mydpcomp
logical logical mylogical
character character mycharacter
!qp real(b16) myqp
!qpcomp complex(b16) myqpcomp
function $1_$2(x)
use ccm_numz
integer :: $1_$2
$3 :: x
$1_$2=1
end function $1_$2
function $1_$2(x)
use ccm_numz
integer :: $1_$2
$3, pointer :: x$4
if(associated(x))then
$1_$2=size(x)
else
$1_$2=0
endif
end function $1_$2
make_mod_script 0100755 0000765 0000024 00000000054 07601724105 013473 0 ustar tkaiser staff #!/bin/csh -f
./make_mod << here
$1 $2
here
makefile 0100644 0000765 0000024 00000011717 07601724105 012115 0 ustar tkaiser staff default : libccm
include $(CCM_COM)_include
OS= ccm_barrier_mod.o ccm_bcast_mod.o ccm_init_mod.o\
ccm_numz_mod.o ccm_reduce_mod.o ccm_merge_mod.o\
ccm_allreduce_mod.o ccm_scatter_mod.o ccm_scatterv_mod.o\
ccm_gatherv_mod.o ccm_gather_mod.o ccm_alltoall_mod.o\
ccm_alltoallv_mod.o ccm_error_mod.o ccm_check_sizes.o \
ccm_rank_mod.o ccm_size_mod.o ccm_rs_mod.o
make_mod: make_mod.f90
$(F90) -o make_mod make_mod.f90
ccm_allreduce_mod.f90:ccm_allreduce_mod.input make_mod ccm_barrier_mod.o
make_mod_script ccm_allreduce_mod.input ccm_allreduce_mod.f90
ccm_alltoall_mod.f90:ccm_alltoall_mod.input make_mod ccm_barrier_mod.o
make_mod_script ccm_alltoall_mod.input ccm_alltoall_mod.f90
ccm_alltoallv_mod.f90:ccm_alltoallv_mod.input make_mod ccm_barrier_mod.o
make_mod_script ccm_alltoallv_mod.input ccm_alltoallv_mod.f90
ccm_bcast_mod.f90:ccm_bcast_mod.input make_mod ccm_barrier_mod.o
make_mod_script ccm_bcast_mod.input ccm_bcast_mod.f90
ccm_gather_mod.f90:ccm_gather_mod.input make_mod ccm_barrier_mod.o
make_mod_script ccm_gather_mod.input ccm_gather_mod.f90
ccm_gatherv_mod.f90:ccm_gatherv_mod.input make_mod ccm_barrier_mod.o
make_mod_script ccm_gatherv_mod.input ccm_gatherv_mod.f90
ccm_reduce_mod.f90:ccm_reduce_mod.input make_mod ccm_barrier_mod.o
make_mod_script ccm_reduce_mod.input ccm_reduce_mod.f90
ccm_scatter_mod.f90:ccm_scatter_mod.input make_mod ccm_barrier_mod.o
make_mod_script ccm_scatter_mod.input ccm_scatter_mod.f90
ccm_scatterv_mod.f90:ccm_scatterv_mod.input make_mod ccm_barrier_mod.o
make_mod_script ccm_scatterv_mod.input ccm_scatterv_mod.f90
ccm_size_mod.f90:ccm_size_mod.input make_mod
make_mod_script ccm_size_mod.input ccm_size_mod.f90
ccm_rank_mod.f90:ccm_rank_mod.input make_mod
make_mod_script ccm_rank_mod.input ccm_rank_mod.f90
ccm_mod.o: ccm_mod.f90 $(OS)
$(PF90) -c ccm_mod.f90
ccm_barrier_mod.o: ccm_barrier_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_barrier_mod.f90
ccm_bcast_mod.o: ccm_bcast_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_bcast_mod.f90
ccm_scatter_mod.o: ccm_scatter_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_scatter_mod.f90
ccm_scatter2_mod.o: ccm_scatter2_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_scatter2_mod.f90
ccm_scatterv_mod.o: ccm_scatterv_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_scatterv_mod.f90
ccm_gatherv_mod.o: ccm_gatherv_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_gatherv_mod.f90
ccm_gather_mod.o: ccm_gather_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_gather_mod.f90
ccm_alltoall_mod.o: ccm_alltoall_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_alltoall_mod.f90
ccm_alltoallv_mod.o: ccm_alltoallv_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_alltoallv_mod.f90
ccm_allreduce_mod.o: ccm_allreduce_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_merge_mod.o ccm_bcast_mod.o ccm_rs_mod.o
$(PF90) -c ccm_allreduce_mod.f90
ccm_merge_mod.o: ccm_merge_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_rs_mod.o
$(PF90) -c ccm_merge_mod.f90
ccm_numz_mod.o: ccm_numz_mod.f90
$(PF90) -c ccm_numz_mod.f90 $(INC_DIR)
ccm_error_mod.o: ccm_error_mod.f90 ccm_numz_mod.o
$(PF90) -c ccm_error_mod.f90
ccm_check_sizes.o: ccm_check_sizes.f90 ccm_numz_mod.o ccm_bcast_mod.o ccm_allreduce_mod.o ccm_alltoall_mod.o ccm_scatter_mod.o ccm_gather_mod.o
$(PF90) -c ccm_check_sizes.f90
#-I$(INC_DIR)
ccm_reduce_mod.o: ccm_reduce_mod.f90 ccm_numz_mod.o ccm_error_mod.o ccm_merge_mod.o ccm_rs_mod.o
$(PF90) -c ccm_reduce_mod.f90
ccm_init_mod.o: ccm_init_mod.f90 ccm_numz_mod.o ccm_error_mod.o
$(PF90) -c ccm_init_mod.f90
ccm_rs_mod.o: ccm_rs_mod.f90 ccm_rank_mod.o ccm_size_mod.o
$(PF90) -c ccm_rs_mod.f90
ccm_rank_mod.o: ccm_rank_mod.f90 ccm_numz_mod.o
$(PF90) -c ccm_rank_mod.f90
ccm_size_mod.o: ccm_size_mod.f90 ccm_numz_mod.o
$(PF90) -c ccm_size_mod.f90
clean:
/bin/rm -f *mod *o \
ccm_allreduce_mod.f90 ccm_alltoallv_mod.f90 ccm_gather_mod.f90 \
ccm_reduce_mod.f90 ccm_scatterv_mod.f90 ccm_alltoall_mod.f90 \
ccm_bcast_mod.f90 ccm_gatherv_mod.f90 ccm_scatter_mod.f90 \
allreduce_test alltoallv_test bcast_test reduce_test \
scat_gatv_test alltoall_test scat_gat_test \
allreduce_test.f90 alltoallv_test.f90 bcast_test.f90 reduce_test.f90 \
scat_gatv_test.f90 alltoall_test.f90 scat_gat_test.f90 \
ccm_rank_mod.f90 ccm_size_mod.f90 \
make_test make_mod libccm.a build_report
libccm:ccm_mod.o
ar -r libccm.a $(OS) ccm_mod.o
$(RANLIB) libccm.a
f9t= ccm_barrier_mod.f90 ccm_init_mod.f90 ccm_mod.f90 make_mod.f90 \
ccm_error_mod.f90 ccm_merge_mod.f90 ccm_numz_mod.f90 ccm_check_sizes.f90 \
ccm_rs_mod.f90
FILES= $(f9t) *input *script makefile *include README build.txt build.html to_quad from_quad
archive:
tar -cf `date +%y%m%d`source.tar $(FILES)
tar:
tar -cf mpi_src.tar $(FILES)
_include 0100644 0000765 0000024 00000000306 07607556500 012122 0 ustar tkaiser staff # generic include file
FFLAGS=
MPICH=
F90= echo "you must setenv CCM_COM to compile" ; echo $<
PF90=echo "you must setenv CCM_COM to compile" ; echo $<
INC_DIR=
LIB_DIR=
LIB=
RANLIB=ranlib
aix_include 0100644 0000765 0000024 00000000172 07601724105 012615 0 ustar tkaiser staff # for MPI on AIX
FFLAGS= -qsuffix=f=f90
PF90=mpxlf90 $(FFLAGS)
F90=xlf90 $(FFLAGS)
INC_DIR=
LIB_DIR=
LIB=
RANLIB=ranlib
darwin_include 0100644 0000765 0000024 00000000314 07601724105 013316 0 ustar tkaiser staff # for MPICH on my OSX box
FFLAGS=
MPICH=/Volumes/unix/mpich
F90=f90 $(FFLAGS)
PF90=f90 $(FFLAGS)
INC_DIR= -I$(MPICH)/include
LIB_DIR= -L$(MPICH)/lib
LIB=-lfmpich -lmpich -lpmpich -lU77 -lc
RANLIB=ranlib
sgi_mpi_include 0100644 0000765 0000024 00000000123 07607661653 013475 0 ustar tkaiser staff # for the sgi
FFLAGS=
F90=f90
PF90=f90
INC_DIR=
LIB_DIR=
LIB=
RANLIB=/bin/ls -lt
t3e_mpi_include 0100644 0000765 0000024 00000000403 07601724105 013371 0 ustar tkaiser staff # for the cray t3e
# (-M 1110) == suppress the warning message about double precision mapping to real
# this effect mainly the tests but also one time in ccm_init_mod
FFLAGS= -M 1110
F90=f90 $(FFLAGS)
PF90=f90 $(FFLAGS)
INC_DIR=
LIB_DIR=
LIB=
RANLIB=ranlib
README 0100644 0000765 0000024 00000001316 07601724105 011267 0 ustar tkaiser staff If you are running csh or similar shell then
before doing a make do one of the following:
setenv CCM_COM sgi_mpi
setenv CCM_COM darwin
setenv CCM_COM aix
setenv CCM_COM t3e_mpi
depending on your platform.
If you are running sh or similar shell then
before doing a make do one of the following:
export CCM_COM=sgi_mpi
export CCM_COM=darwin
export CCM_COM=aix
export CCM_COM=t3e_mpi
This sets the make include file that is for the build.
Include files for other platforms can be created based
on these files. This version has not recently been
tested on the T3e and most likely will require minor
changes to work correctly on that machine.
See the file build.html or build.txt for additional information
build.txt 0100644 0000765 0000024 00000045226 07610624751 012265 0 ustar tkaiser staff Building the
Collective Communications Module
from the reference source
This document discusses building the Collective Communications Module from the reference source. In particular, it discusses building CCM from the MPI and Shmem reference implementations on the platforms listed below. It also discusses building for various sets of array ranks and porting the module to other platforms.
Versions and platforms
There are two primary versions and reference implementations of the Collective Communications Module. The first is based on MPI or the Message Passing Interface. A good discussion on MPI can be found in www-unix.mcs.anl.gov/mpi/. The second reference implementation is based on the Shared Memory or shmem routines defined by Cray and adapted by SGI. The company's links for information on the shmem routines are www.cray.com/products/software/mpt.html and www.sgi.com/software/mpt/.
The make files distributed with the reference implementations will compile for the platforms listed below. There are no differences in MPI source for the various platforms for which it is compiled. Except for some minor differences, the shmem implementation has the same source for each platform. Obviously, there are different compilers and library settings for each machine and implementation. These differences are isolated into a small make include file.
* MPI
o MPICH on Apple OSX, Absoft compiler
o IBM SP
o Cray T3e
o SGI Origin
* SHMEM
o T3e
o SGI Origin
o Cray SV1
Making using a predefined make include file
To build the reference implementations do the following.
1. Untar the source file in its own directory. For the MPI implementation:
mkdir mpi_src
mv mpi_src.tar mpi_src
cd mpi_src
tar -xf mpi_src.tar
or
cd mpi_src
mkdir shmem_src
mv shmem_src.tar shmem_src
cd shmem_src
tar -xf shmem_src.tar
2. Setenv
There is an environmental variable referenced in the make file that points to a make include file. The variable is CCM_COM. It needs to be set as follows:
For example on an IBM SP running tcsh you would type:
setenv CCM_COMM aix
3. Make
Type:
make make_mod
make
and the Collective Communications Module will be built.
What gets built?
The final products of the make are a library, libccm.a and a module file. The name of the module file is system dependent but is something like ccm.mod or CCM.mod. On most systems, both the library and mod files are required to run a program using the module.
There are a large number of intermediate of "*.o" or object files created. These are made from "*.f90" source files. Most of the "*.f90" files are created by a preprocessor make_mod. The Fortran source for make_mod is included with the distribution. Make_mod is actually run by the make file using a script make_mod_script. The script/program take as input "*.input" files and produce the "*.f90" files. The preprocessor is discussed in more detail below.
There is also a file created, build_report. Build_report contains a collection of html formatted tables. The tables show the data types and ranks of arrays that are supported by different routines in the module. For example for ccm_gather we have the entries:
This indicates that this routine was built for input arrays of rank 0 to 3 and output ranks 1 to 3, for two real sizes, integer, two complex sizes, logical and character variables. The methodology for adding higher array ranks is discussed in the "Preprocessor" section below.
What is in the make include file?
The make include file gives flags and paths required to build the module. The one shown below is for compiling using the MPICH version of MPI on Apple OSX
# for MPICH on Apple OSX
FFLAGS=
MPICH=/usr/local/mpich
F90=f90 $(FFLAGS)
PF90=f90 $(FFLAGS)
INC_DIR= -I$(MPICH)/include
LIB_DIR= -L$(MPICH)/lib
LIB=-lfmpich -lmpich -lpmpich -lU77 -lc
RANLIB=ranlib
There are no special flags required to build using MPICH so the FFLAGS line is blank. The next line points to the base MPICH directory. The lines F90 and PF90 are the serial and parallel Fortran 90 compile lines. The next two lines are the "-I" and "-L" options for the MPICH include files and library. The LIB line gives all of the libraries required to build an application using MPICH. The final line points to ranlib. Note on the SGI and Sv1 ranlib is not required so it is replaced with a dummy call to /bin/ls.
Preprocessor
Purpose
As discussed above, each routine is built for a collection of data types and array ranks. We must have a separate routine for each data type and input/output array rank pair so that we can take advantage of Fortran's capability to determine an array size for arguments passed to subroutines. This leads to a larger number of routines. The ideal way to "handle" the large number of routines would be to use templates. We would write a generic routine once and let the compiler create the various instances that we need. Unfortunately, Fortran 90 lacks the template capability. The preprocessor make_mod is used instead of templates.
Make_mod takes as input, data types, array ranks and a generic source file, *.input. It produces an output file that contains routines for the requested data types and array ranks.
Source data
The input files have a header and generic source. Consider the input file for the MPI version of CCM_BCAST.
.true.
bcast
0 3 0 3
sp real(b4) myreal
dp real(b8) mydouble
in integer(def_int) myint
comp complex(c4) mycomp
dpcomp complex(c8) mydpcomp
logical logical mylogical
character character mycharacter
subroutine $1_$2(x,root,the_err) !(x,to_send,root,the_err)
!base1 rank input .eq. 0 .and. rank output .eq. 0
same
end subroutine $1_$2
subroutine $1_$2(x,root,the_err) !(x,to_send,root,the_err)
!base1 rank other than zero
use ccm_numz
use ccm_error_mod
use ccm_rs_mod
implicit none
$3,intent (inout) :: x$4
integer, optional, intent(in) :: root
integer, optional, intent(out) :: the_err
integer :: local_root,local_size
integer :: r1,r2,mytype
if(present(the_err))the_err=0
r1=ccm_rank(x)
r2=r1
mytype=$6
if(iand(do_tests,ccm_trace) .ne. 0) &
write(*,*)myid," entered $1 with in/out ranks ",r1,r2," data type $3"
if(present(root))then
local_root=root
else
local_root=MPI_ROOT
endif
local_size=1 ! r1=0
local_size=size(x) ! r1>0
!start of error tests
if(iand(do_tests,ccm_checksize) .ne. 0)then
test_ray1(1)=local_size
call ccm_check_sizes("bcast",test_ray1,test_ray2,test_ray3,local_root,passed_test)
if(.not. passed_test)then
call ccm_warning("bcast",mytype,r1)
if(present(the_err))the_err=1
return
endif
endif
!end of error tests
call MPI_BCAST(x,local_size,mytype,local_root,mycomm,MPI_ERR)
if(mpi_err .ne. 0)then
write(err_str1,"(""low level communication error:"",i5)")mpi_err
call ccm_fatal("bcast",mytype,r1)
endif
end subroutine $1_$2
The first line .true. indicates that we want to define this routine only for input and output arrays of the same rank. (Note that this routine could be compiled with mixed rank arrays but for bcast this is not normally required since the input and output arrays are the same.)
The second line bcast gives the name of the routine. When the preprocessor creates the actual source this will be prepended with "ccm_" so the generic routine is actually called as
call ccm_bcast(....
The next line gives the ranks of the arrays for which this routine is defined, input ranks of 0 to 3 and output ranks of 0 to 3.
The next lines:
sp real(b4) myreal
dp real(b8) mydouble
in integer(def_int) myint
comp complex(c4) mycomp
dpcomp complex(c8) mydpcomp
logical logical mylogical
character character mycharacter
give information about the specific instances of the generic routine. The text in the first field is appended to the generic routine name to give base specific routine names. The base specific routine names have the ranks of the arrays appended to them to give a collection of specific routine names for a given generic routine.
The next field gives the data type for the routine. The kinds are defined in ccm_numz_mod.f90 to map to the two normal real and complex types, and the default integer type.
The third field gives an MPI data type. These are set in ccm_init_mod.f90. For the shmem version of the source these are dummy values but there is an additional field that indicates the data type for the shmem_put routines.
The rest of the file is the generic source for the routine. There are actually two blocks of text delineated by the lines:
subroutine $1_$2
end subroutine $1_$2
The first "subroutine" is for sending scalars only. The second subroutine is for all other cases. The line same in the first block tells the preprocessor to use the same source for the scalar and vector versions of the routine.
You will notice that the source contains $1, $2, $3, and so on. These are replaced by the preprocessor by other text as indicated in the table below. Note that many of the replacements are dependent on input and output array ranks:
$1 mod_name, ccm_bcast in this case
$2 routine name with ranks replaced with 00, 01, ... nm
$3 data type
$4 input data rank, replaced with one of the following
" " "(:) "
"(:,:) " "(:,:,:) "
"(:,:,:,:) " "(:,:,:,:,:) "
"(:,:,:,:,:,:) " "(:,:,:,:,:,:,:)"
$5 output data rank, replaced with one of the following
" " "(:) "
"(:,:) " "(:,:,:) "
"(:,:,:,:) " "(:,:,:,:,:) "
"(:,:,:,:,:,:) " "(:,:,:,:,:,:,:)"
$6 mpitype
$f input data rank, replaced with one of the following
" " "(:) "
"(:,1) " "(:,1,1) "
"(:,1,1,1) " "(:,1,1,1,1) "
"(:,1,1,1,1,1) " "(:,1,1,1,1,1,1)"
$g output data rank, replaced with one of the following
" " "(:) "
"(:,1) " "(:,1,1) "
"(:,1,1,1) " "(:,1,1,1,1) "
"(:,1,1,1,1,1) " "(:,1,1,1,1,1,1)"
$h routine name without ranks
$i shmem data type
$j pointer rank, replaced with " " for scalar, "(:)" for arrays
$k pointer rank,replaced with " " for scalar, "(:)" for arrays
$l i, numeric value for input rank 0 - n
$m j, numeric value for output rank 0 - n
The preprocessing program also has some additional filters that allow conditional compilation. Lines that contain the text in the table shown below are only added to the final source if the routine that is being generated is of the prescribed rank.
Specifying array ranks and data types
The routines in the reference implementation can be built for array ranks up to 7 by using the preprocessor. All that is required is to change the third line of the *.input file. However, this is not advisable unless absolutely needed. The size of the module is proportional to the square of the maximum array rank. For a single generic routine that is defined for array ranks 0-7 there are (8 input array ranks)*(8 output array ranks)*(7 data types) = 448 subroutines. Note that the need for higher ranks can be reduced by using the Fortran 90 reshape function. Also, the number of routines can be reduced by removing data type lines from the *.input files by using the standard Fortran 90 comment indicator "!".
Minimum lower bound values for array ranks
In order for an implementation to work correctly, the lower bounds for the input and output array ranks must have the values given in the table below. That is, the first and third values of the third line of the *input files must have the following values. Note that ccm_send and ccm_recv are only defined for rank 1 arrays.
Portability of the MPI reference implementation
Moving to other platforms
An effort was made to make the MPI reference implementation portable. The source should require only a few, if any, changes to move to a new platform. The known primary potential problem areas are related to data sizes. The data types used within the implementation are real(b8), real(b4), complex(c8), complex(c4), and integer(def_int) where we have b8, b4, c8, c4, and def_int defined in ccm_numz_mod.f90 as:
integer, parameter:: b8 = selected_real_kind(10)
integer, parameter:: b4 = selected_real_kind(5)
integer, parameter:: c8 = selected_real_kind(10)
integer, parameter:: c4 = selected_real_kind(5)
integer, parameter:: def_int = kind(iccm_dummy_int)
On most platforms these map to 4 and 8 byte reals, 8 and 16 byte complex values and the default integer which is either 2, 4, or 8 bytes. For some platforms the selected_real_kind parameters might need to be changed.
For MPI routines we create the types: myreal, mydouble, mycomp, mydpcomp and myint. These MPI types are defined in the routine ccm_init which is in the file ccm_init_mod.f90. The case statement
case(200)
! use mpi_real4 and mpi_real8
call MPI_TYPE_CONTIGUOUS(1,mpi_real4,myreal,mpi_err)
call MPI_TYPE_COMMIT(myreal,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
call MPI_TYPE_CONTIGUOUS(1,mpi_real8,mydouble,mpi_err)
call MPI_TYPE_COMMIT(mydouble,mpi_err)
if(mpi_err .ne. 0)call ccm_mpi_error("ccm_init")
case default
defines the first two types. The complex types are defined in terms of these. For other platforms selecting another block of the case statement might be required to get the correct mapping between Fortran and MPI data types.
Some Fortran compilers support three real and complex data types, the third is sometimes called quad precision. Quad precision real values use 16 bytes. (On the Cray SV1 double precision values use 16 bytes, normal reals 8 bytes, but the SV1 also has a 4 byte real.)
Not all Fortran compilers have a third real data type and MPI does not have direct support for quad precision reals. So by default, the MPI reference implementation builds for two real data types. To build the MPI implementation for quad precision do the following:
(1) In the *input files remove ! from the lines
!qp real(b16)
!qpcomp complex(c16)
The script to_quad can be used to perform these edits
to_quad *input
(2) In the file ccm_merge_mod.f90 remove the initial ! from lines containing !qp
The script to_quad can be used to perform these edits
to_quad ccm_merge_mod.f90
(3) Make as discussed above
Portability of the shmem implementation
There is a constant
integer, parameter :: maxprocs=64
defined in ccm_numz_sgi.f90 and ccm_numz_sv1.f90 that sets the maximum number of processors that can be used by the module. This parameter sets the sizes of two synchronization arrays
integer :: sync_block
common /ccm_p2p/ sync_block(-1:maxprocs*2)
in the same file and
integer(selected_int_kind(18)) :: mycray(4,maxprocs)
in the ccm_checkin routine. To run on a larger number of processors maxprocs must be increased. If you try to run on a number of processors greater than maxprocs a warning message will be printed and the job will stop. Maxprocs is a constant, instead of variable, to avoid the use of Cray style pointers.
Unfortunately, the Cray/SGI shmem library routines are not portable across on a large number of platforms. Indeed, there are significant difference in the implementation of the shmem routines across various SGI and Cray machines. Thus a different philosophy was adopted for the shmem implementation.
The routines that are part of the API are defined in terms of generic send and receive operations. The shmem ccm_bcast routine, for example, does not contain a reference to shmem routines. It does call ccm_send and ccm_recv. Ccm_send and ccm_recv are defined for each data type in terms of shmem routines. These routines are only needed and defined for rank 1 arrays.
The shmem implementation is a good place to start to create a new implementation of the API. Replacing the routines Ccm_send and ccm_recv would constitute most of the work.
There is a coding practice used in the shmem implementation routines that could cause problems when moving to a new platform. Several of the routines make use of single dimensional pointers to access whole arrays of higher ranks. This works on most platforms but it is not universally portable.
A simple example is, say we have a two dimensional array, x(10,10), and a pointer, y(:). Some of the routines do things similar to following to set values for the 2d array.
y=>x(:)
do i=1,100
y(i)= ....
enddo
Although this works on most platforms, the Fortran standard dictates that we should only reference ten elements of the array using this scheme. For the shmem routines to be Fortran compliant, these types of operations with pointers should be replaced with explicit array copies.
By default, the shmem reference implementation builds for two real and complex data types, 4 and 8 bytes. To build the MPI implementation for 16 byte data types do the following:
(1) In the *input files remove ! from the lines
!qp real(b16)
!qpcomp complex(c16)
The script to_quad can be used to perform these edits
to_quad *input
(2) In the file ccm_merge_mod.f90 remove the initial ! from lines containing !qp
The script to_quad can be used to perform these edits
to_quad ccm_merge_mod.f90
(3) Make as discussed above
Finally, it was discovered that some shmem implementations have problems sending complex values in some instances. To get around this problem complex values are sent as a pair of reals. Contrary to what the online documentaion says, the SV1 does not have a put operation for character values. One was written in terms of integers. build.html 0100755 0000765 0000024 00000061732 07610624456 012417 0 ustar tkaiser staff
Building the Collective Communications Module
Building the
Collective Communications Module
from the reference source
This document discusses building the Collective Communications Module from the reference source. In particular, it discusses building CCM from the MPI and Shmem reference implementations on the platforms listed below. It also discusses building for various sets of array ranks and porting the module to other platforms.
Versions and platforms
There are two primary versions and reference implementations of the Collective Communications Module. The first is based on MPI or the Message Passing Interface. A good discussion on MPI can be found in www-unix.mcs.anl.gov/mpi/. The second reference implementation is based on the Shared Memory or shmem routines defined by Cray and adapted by SGI. The company's links for information on the shmem routines are
www.cray.com/products/software/mpt.html and www.sgi.com/software/mpt/.
The make files distributed with the reference implementations will compile for the platforms listed below. There are no differences in MPI source for the various platforms for which it is compiled. Except for some minor differences, the shmem implementation has the same source for each platform. Obviously, there are different compilers and library settings for each machine and implementation. These differences are isolated into a small make include file.
- MPI
- MPICH on Apple OSX, Absoft compiler
- IBM SP
- Cray T3e
- SGI Origin
- SHMEM
Making using a predefined make include file
To build the reference implementations do the following.
1. Untar the source file in its own directory. For the MPI implementation:
mkdir mpi_src
mv mpi_src.tar mpi_src
cd mpi_src
tar -xf mpi_src.tar
or
cd mpi_src
mkdir shmem_src
mv shmem_src.tar shmem_src
cd shmem_src
tar -xf shmem_src.tar
2. Setenv
There is an environmental variable referenced in the make file that points to a make include file. The variable is CCM_COM. It needs to be set as follows:
setenv CCM_COM | Communications Library |
MPI |
Shmem |
Platform |
Apple OSX | darwin | NA |
IBM SP | aix | NA |
SGI | sgi_mpi | sgi_shmem |
Cray SV1 | NA | sv1_shmem |
Cray T3e | t3e_mpi | t3e_shmem |
For example on an IBM SP running tcsh you would type:
setenv CCM_COMM aix
3. Make
Type:
make make_mod
make
and the Collective Communications Module will be built.
What gets built?
The final products of the make are a library, libccm.a and a module file. The name of the module file is system dependent but is something like ccm.mod or CCM.mod. On most systems, both the library and mod files are required to run a program using the module.
There are a large number of intermediate of "*.o" or object files created. These are made from "*.f90" source files. Most of the "*.f90" files are created by a preprocessor make_mod. The Fortran source for make_mod is included with the distribution. Make_mod is actually run by the make file using a script make_mod_script. The script/program take as input "*.input" files and produce the "*.f90" files. The preprocessor is discussed in more detail below.
There is also a file created called build_report. Build_report contains a collection of html formatted tables. The tables show the data types and ranks of arrays that are supported by different routines in the module. For example for ccm_gather we have the entries:
gather | 1 | 2 | 3 |
0 | y | y | y |
1 | y | y | y |
2 | y | y | y |
3 | y | y | y |
gather | real(b4) | real(b8) | integer(def_int) | complex(c4) | complex(c8) | logical | |