#include #define PRECISION float /*************************************************************************** * * * DATA PARALLEL BLAS based on MPL * * * * Internal routine, this routine is not supposed to be * * called by user programs. * * * * Version 1.0 1/4-92 , * * For MasPar MP-1 computers * * * * para//ab, University of Bergen, NORWAY * * * * The calling sequence may be changed in a future version. * * Please report any BUGs, ideas for improvement or other * * comments to * * adm@parallab.uib.no * * * * Future versions may then reflect your suggestions. * * The most current version of this software is available * * from netlib@nac.no , send the message `send index from maspar' * * * * REVISIONS: * * * ***************************************************************************/ #ifdef __STDMPL__ void mpl_rec_spsW( register plural PRECISION a0, register plural PRECISION a1, register plural PRECISION *aps0, register plural PRECISION *aps1, int m, int n ) #else void mpl_rec_spsW(a0,a1,aps0,aps1,m,n) register plural PRECISION a0,a1,*aps0,*aps1; int m,n; #endif /* block preskewing west on a rectangular machine */ { register plural int ix=ixproc,iy=iyproc; register int nx=nxproc,ny=nyproc; /* set elements outside m*n block to zero */ if ((m=m) || (ix>=n)) a0 = 0.0; if ((iy+ny >=m) || (ix>=n)) a1 = 0.0; } /* use the router to preskew A west */ /* shift row i by 2*i places */ *aps0 = router[iy*nx + ((ix+(iy<<1))&(nx-1))].a0; *aps1 = router[iy*nx + ((ix+(iy<<1)+1)&(nx-1))].a1; } #ifdef __STDMPL__ void mpl_rec_spsN( plural PRECISION b0, plural PRECISION b1, plural PRECISION *bps0, plural PRECISION *bps1, int m, int n ) #else void mpl_rec_spsN(b0,b1,bps0,bps1,m,n) plural PRECISION b0,b1; plural PRECISION *bps0,*bps1; int m,n; #endif /* block preskewing west on a rectangular machine preskewing B north and reshuffling to new storage scheme where two adjacent elements in the N-S direction reside on the same processor */ /* written by Erik Boman, 90/07/17 */ /* this routine uses 4 router calls, this can be reduced to 2 by doing the "reshuffling" between different storage schemes together with the preskewing in one phase */ { register plural PRECISION btmp0,btmp1; register plural PRECISION tmp0; register plural int to0,to1, tmp1; register plural int ix=ixproc,iy=iyproc; register int nx=nxproc,ny=nyproc; /* set elements outside m*n block to zero */ if ((m=m) || (ix>=n)) b0 = 0.0; if ((iy+ny >=m) || (ix>=n)) b1 = 0.0; } /* preskewing B north */ btmp0 = router[ix + ((ix+iy)&(ny-1))*nx].b0; btmp1 = router[ix + ((ix+iy)&(ny-1))*nx].b1; if ((ix+iy >= ny) && (ix+iy < nx) ) { tmp0 = btmp0; btmp0 = btmp1; btmp1 = tmp0; } /* reshuffle B to the new storage scheme */ /* before : b(i,j) and b(i+ny,j) are on proc(i,j) after : b(2i,j) and b(2i+1,j) are on proc(i,j) */ to0 = (iy>>1)*nx + ix; to1 = ((iy+ny)>>1)*nx + ix; if (iy&1){ /* iy is odd */ tmp0 = btmp0; btmp0 = btmp1; btmp1 = tmp0; tmp1 = to0; to0 = to1; to1 = tmp1; } router[to0].btmp0 = btmp0; router[to1].btmp1 = btmp1; if (iy >= ny>>1){ tmp0 = btmp0; btmp0 = btmp1; btmp1 = tmp0; } *bps0 = btmp0; *bps1 = btmp1; }