/* Written by: Hans Munthe-Kaas Assoc. Prof. Dept. of Computer Science University of Bergen N-5020 Bergen Norway Email: hans@eik.ii.uib.no SPFFT Version: 1.0 Last revised: March 29, 1993 */ /* MasPar Fortran version by Erik Boman (erikb@ii.uib.no). */ #include "spfft.h" /* The following file contains code segments that should be tuned carefully */ #include "spfftOptim.h" /*****************************************************************************/ void fft_srt(arr,mode,ctrl) PLCPLX *arr; int mode; struct FFTCTL *ctrl; { if (ctrl->final_rout==NULL) ctrl->final_rout = blp_rout(ctrl->final_blp); if ((mode == 1) || (mode == -1)) { #if (float_type == 1) permut( arr, 8, 1, mode, ctrl->final_rout); #else permut( arr, 8, 2, mode, ctrl->final_rout); #endif } if ((mode == 2) || (mode == -2)) { int asiz; asiz = 1<<(ctrl->mbits); #if (float_type == 1) permut( arr, 8, 1, mode, ctrl->final_rout); permut( &(arr[asiz]), 8, 1, mode, ctrl->final_rout); #else permut( arr, 8, 2, mode, ctrl->final_rout); permut( &(arr[asiz]), 8, 2, mode, ctrl->final_rout); #endif } return; } /*****************************************************************************/ void fft_free(ctrl) struct FFTCTL *ctrl; { P_FREE(ctrl->twdfac); P_FREE(ctrl->twdpt); FREE(ctrl->actgr); FREE(ctrl->agrbits); P_FREE(ctrl->fc); P_FREE(ctrl->ifc); P_FREE(ctrl->sPackRout1); P_FREE(ctrl->sPackRout2); if (ctrl->unscr_blp != NULL) blp_free(ctrl->unscr_blp); if (ctrl->scr_blp != NULL) blp_free(ctrl->scr_blp); if (ctrl->post_blp != NULL) blp_free(ctrl->post_blp); if (ctrl->final_blp != NULL) blp_free(ctrl->final_blp); if (ctrl->mem2l_blp != NULL) blp_free(ctrl->mem2l_blp); if (ctrl->l2mem_blp != NULL) blp_free(ctrl->l2mem_blp); if (ctrl->final_rout != NULL) pmr_free(ctrl->final_rout); FREE(ctrl); } /*****************************************************************************/ struct FFTCTL * fft_init(mode,numdim,dimx,trszx,dimy,trszy,dimz,trszz) int mode,numdim,dimx,trszx,dimy,trszy,dimz,trszz; { struct FFTCTL *ctrl; int nb,tbits,dy,tzy,dz,tzz; unsigned mb[3], agrlst[6]; /* in case too few paramerters are supplied, fill last args with 1 */ if (numdim < 3) { tzz = 1; dz = 1; } else { tzz = trszz; dz = dimz; } if (numdim < 2) { tzy = 1; dy = 1; } else { tzy = trszy; dy = dimy; } /* test input data */ if (numdim > 3) ERROR("fft_init: numdim > 3!\nUse advanced initialization routine fft_ainit for higher dimesional transforms.\n"); if ((dimx&(dimx-1)) || (dy&(dy-1)) || (dz&(dz-1))) ERROR("fft_init: All dimensions must be powers of 2\n"); if (dimx%trszx) ERROR("fft_init: Illegal value of trszx!\n"); if (dy%tzy) ERROR("fft_init: Illegal value of trszy!\n"); if (dz%tzz) ERROR("fft_init: Illegal value of trszz!\n"); if ((mode > 1) || (mode < 0)) ERROR("fft_init: Illegal mode!\n"); if (dimx1)&&(dimynyproc) { numbits(nb,dimx-1); } else { numbits(nb,trszx-1); } mb[1] = MAX(0,nb-lnxproc); } numbits(tbits,dimx*dy*dz-1); mb[2] = MAX(0,tbits-lnproc-mb[1]-mb[0]); /* set up active groups */ agrlst[0] = 0; numbits(nb,trszx-1); agrlst[1] = nb; agrlst[2] = lnxproc + mb[0] + mb[1]; numbits(nb,trszy-1); agrlst[3] = nb; numbits(nb,trszz-1); agrlst[5] = nb; agrlst[4] = tbits - nb; ctrl = fft_ainit(mode,mb,numdim,agrlst,NULL); return(ctrl); } /* end subroutine fft_init */ /****************************************************************************/ struct FFTCTL * fft_ainit(mode,mb,nactgr,agrlst,postprmut) int mode,*mb,nactgr,*agrlst; BL_PERMUT postprmut; { struct FFTCTL *ctrl; int mbits,tbits,igr,intmat[32*32]; unsigned *agrbits; /* printf("mode: %d\n",mode); printf("mb: %d %d %d\n",mb[0],mb[1],mb[2]); printf("nactgr: %d\n",nactgr); for (igr=0;igr 1)) ERROR("fft_ainit: Illegal mode!\n"); if (mb[2]<1) { if (mode == 0) { ERROR("fft_ainit: Data array is too small or badly organized. You must always have at least twice as many data points as processors, and m.s.b. must be a memory bit.\n"); } if (mode == 1) { ERROR("fft_ainit: Data array is too small or badly organized. Real transforms need 4x as many points as processors, and the 2 msb. must must be memory bits.\n"); } } for (igr=0;igr tbits) { if (mode == 0) { ERROR("fft_ainit: Active groups list incorrect. Transform larger than specified memory address space!\n"); } if (mode == 1) { ERROR("fft_ainit: Active groups list incorrect. Transform length cannot be longer than half the arraysize in final direction, i.e. msb. must be inactive!\n"); } } } for (igr=0;igr agrlst[2*igr+2]) ERROR("fft_ainit : Active groups list incorrect. Transforms overlap, or doesn't appear in ascending order.\n"); if (agrlst[2*igr+1] == 0 ) ERROR("fft_ainit : Active groups list incorrect. Active group of length 0.\n"); } /* allocate space for necessary arrays */ MALLOC(ctrl,1,struct FFTCTL); /* set all pointers to NULL to indicate that they are not alloc'ed */ ctrl->twdfac = NULL; ctrl->twdpt = NULL; ctrl->actgr = NULL; ctrl->agrbits = NULL; ctrl->fc = NULL; ctrl->ifc = NULL; ctrl->unscr_blp = NULL; ctrl->scr_blp = NULL; ctrl->post_blp = NULL; ctrl->final_blp = NULL; ctrl->mem2l_blp = NULL; ctrl->l2mem_blp = NULL; ctrl->final_rout = NULL; ctrl->sPackRout1 = NULL; ctrl->sPackRout2 = NULL; /* set up memory structure info */ /* Address space: (mb[2],ybits,mb[1],xbits,mb[0]), msb is to the left. */ ctrl->mb[0] = mb[0]; ctrl->mb[1] = mb[1]; ctrl->mb[2] = mb[2]; ctrl->mbits = mbits; /* set up masks for membits, xbits, ybits */ ctrl->xbits = (unsigned) (nxproc-1)<< ctrl->mb[0]; ctrl->ybits = (unsigned) (nyproc-1)<< (ctrl->mb[0]+lnxproc+ctrl->mb[1]); ctrl->membits = (unsigned) ((1<xbits|ctrl->ybits); /* info about symmetries */ ctrl->symmetry = mode; /* compute active groups masks */ /* compute pattern of, and count no. of bits that are active somewhere */ /* compute scaling factor for inverse transform */ { unsigned msk1,msk2,xbits,ybits,actbits; int actcnt,grst,grlen; MALLOC(ctrl->actgr,2*nactgr,short); for (igr=0;igr<2*nactgr;igr++) { ctrl->actgr[igr] = agrlst[igr]; } MALLOC(agrbits,nactgr,unsigned); ctrl->agrbits = agrbits; ctrl->nactgr = nactgr; actbits = 0; actcnt = 0; for (igr=0;igractbits = actbits; ctrl->actcnt = actcnt; ctrl->scale = 1/((FLTYP) (1<scale = (FLTYP) sqrt((double) ctrl->scale); #endif } /* Compute permutation information */ /* This routine sets ctrl-> unscr_blp, scr_blp, mem2l_blp, l2mem_blp, post_blp, final_blp */ fft_permut(ctrl,postprmut); return(ctrl); } /* end subroutine fft_ainit */ /*****************************************************************************/ void symmPack_rout(int mode, struct FFTCTL *ctrl) { plural unsigned short *sPackRout; int madr,asiz,igr,ngr,nbits; BL_PERMUT permut, ipermut; unsigned msk; register plural unsigned tadr, dstad, memdest, procdest; asiz = 1<<(ctrl->mbits); ngr = ctrl->nactgr; nbits = lnproc+ctrl->mbits; P_MALLOC(sPackRout,2*asiz,plural unsigned short); /* find permutation to arrive in unscr. membit order */ if (mode==1) { if ((ctrl->post_blp) == NULL) { permut = blp_identNew(nbits); } else { permut = blp_copyNew(ctrl->post_blp); blp_inv(permut); } } else { permut = blp_copyNew(ctrl->unscr_blp); } blp_lMult(permut,ctrl->l2mem_blp); ipermut = blp_copyNew(permut); blp_inv(ipermut); /* Loop over all addresses, compute dest. address */ for (madr=0; madragrbits[igr]; if (dstad & msk) { dstad ^= msk; dstad += 1<<(ctrl->actgr[2*igr]); } } blp_DestAd(tadr,ipermut,dstad); memdest = tadr >> lnproc; procdest = tadr & (nproc-1); /* send memory destination to the processor where it will be used */ router[procdest].memdest = memdest; sPackRout[2*madr] = procdest; sPackRout[2*madr+1] = memdest+asiz; } /* insert routing into ctrl */ if (mode == 1) { ctrl->sPackRout1 = sPackRout; } else { ctrl->sPackRout2 = sPackRout; } blp_free(permut); blp_free(ipermut); return; } /*****************************************************************************/ void symm_pack(PLCPLX *arr, int mode, struct FFTCTL *ctrl) { register plural long long lltmp, *llarr; plural unsigned short *sPackRout; int madr, asiz; register plural unsigned short pdest, mdest; if ((mode > 2) | (mode < -2) | (mode == 0)) { ERROR("symm_pack: Illegal mode!\n"); } asiz = 1<<(ctrl->mbits); switch (mode > 0) { case 1: { /* real-> complex, unpacking after transform */ if (mode == 1) { /* unscrambling is performed before unpacking */ if ((ctrl->sPackRout1) == NULL) { symmPack_rout(1,ctrl); } sPackRout = ctrl->sPackRout1; } else { /* unscrambling is not performed */ if ((ctrl->sPackRout2) == NULL) { symmPack_rout(2,ctrl); } sPackRout = ctrl->sPackRout2; } llarr = (plural long long *) arr; for (madr=0; madr real, packing before transform */ for (madr=0;madrmbits; membits = ctrl->membits; ybits = ctrl->ybits; xbits = ctrl->xbits; tbits = mbits + lnproc; asiz = 1<nactgr; agrbits = ctrl->agrbits; actbits = ctrl->actbits; actcnt = ctrl->actcnt; /* estimate max sizes of, and allocate arrays */ /* These lists will later be used as dynamic arrays, so if we underestimate the size, its not important */ siztwl = 2*(asiz+lnproc); P_MALLOC(itwls,siztwl,plural unsigned); P_MALLOC(twdpt,asiz*actcnt,plural unsigned char); igroup = nactgr; actgrp = 0; itwp = 0; twlsln = 0; mskcb = 1<<(tbits-1); /* mask for current bit (long address) */ /* loop over all bits from msb to lsb */ while (mskcb>0) { if (mskcb&membits) { /* Memory part of transform (butterfly, butterfly ...) */ /* is current bit active? */ if (mskcb&actbits) { /* should we get a new active group? */ if(!actgrp) { igroup--; if (igroup < 0) ERROR("FATAL!\n"); actgrp = agrbits[igroup]; } actgrp ^= mskcb; /* compute current memory bit */ memcb = mskcb; if (mskcb>xbits) memcb>>=lnxproc; if (mskcb>ybits) memcb>>=lnyproc; ominc = memcb<<1; /* increment in looping over outer mem adr. */ /* loop over outer memory addresses */ for (omadr=0;omadrmb); smb_bfl /* this code segment is defined above */ } /* end looping over inner addresses */ } /* end looping over outer addresses */ } /* end if active */ mskcb>>=1; } /* end of a memory bit iteration */ else if ((mskcb&ybits) || (mskcb&xbits)) { /* x- and y-direction part of transform */ /* (bitswap,butterfly,bitswap,butterfly ...) */ /* loop over memory addresses */ swbit = asiz>>1; /* use msb as swap bit */ mskcb2 = mskcb; /* mask for current bit (long address) */ igroup2 = igroup; actgrp2 = actgrp; for(madr=0;madr>1;btmsk>0;btmsk>>=1,mskcb>>=1) { /* do the reduction if this bit is active */ if (actbits&mskcb) { /* should we get a new active group? */ if(!actgrp) { igroup--; if (igroup < 0) ERROR("FATAL!\n"); actgrp = agrbits[igroup]; } actgrp ^= mskcb; /* symbolic butterfly */ long_adr(curadr,madr,ctrl->mb); smb_bfl /* this code segment is defined above */ } } } /* end looping over memory addresses */ } /* end x- and y-direction */ } /* end looping over all bits */ /* find actual size of space required by twiddle factors */ atwsiz = reduceMax32(twlsln); /* allocate space for, and compute twiddle factors */ if (atwsiz > 256) ERROR("fft_ainit: twiddle factor list too long. Code can not handle this geometry. This can be fixed by changing all occurences of 'unsigned char' with 'unsigned short' in spfft.m and spfft.h. This will, however, increase memory usage considerably.\n"); P_MALLOC(twdfac,atwsiz,PLCPLX); { plural double arg; for (i=0;itwdpt = twdpt; ctrl->twdfac = twdfac; ctrl->lsttwp = itwp-1; /* last twiddle pointer */ return; } /* end of subroutine smb_fft */ /**************************************************************************/ void sp_fft(arr,mode,ctrl) PLCPLX arr[]; struct FFTCTL *ctrl; int mode; { unsigned mbits,tbits,actcnt,siztwl,btmsk,atwsiz,asiz,actbits,mskcb; unsigned membits,xbits,ybits,mskcb2,swbit; register int i,icb,madr,ominc,omadr; register unsigned tbad, bbad, memcb; plural unsigned itwp; plural unsigned char *twdpt; register plural FLTYP tmp,tmp0,tmp1,q0r,q0i,q1r,q1i; register plural unsigned char twdad; PLCPLX *twdfac; if ((mode>2) || (mode < -2)) ERROR("sp_fft: Illegal mode!\n"); if (ctrl->twdpt == NULL) { /* symbolic fft */ /* this subroutine sets ctrl-> twdfac, twdpt, lsttwp */ smb_fft(ctrl); } if (mode == 0) return; /* mode 0 used to only perform precomput. */ twdfac = ctrl->twdfac; twdpt = ctrl->twdpt; mbits = ctrl->mbits; tbits = mbits+lnproc; membits = ctrl->membits; xbits = ctrl->xbits; ybits = ctrl->ybits; asiz = 1<<(ctrl->mbits); actbits = ctrl->actbits; /* Address space: { mb[2], lnyproc, mb[1], lnxproc, mb[0] } */ /* where mb[i] is the number of memory bits in each segment i */ switch (mode>0) { case 1: /* if (mode == 1) do forwards transform */ /* Pack real data? */ if (ctrl->symmetry == 1) { for (madr=0;madrscale; for (madr=0;madr0) { if (mskcb&membits) { /* Memory part of transform (butterfly, butterfly ...) */ /* is current bit active? */ if (mskcb&actbits) { /* compute current memory bit */ memcb = mskcb; if (mskcb>xbits) memcb>>=lnxproc; if (mskcb>ybits) memcb>>=lnyproc; ominc = memcb<<1; /* increment in looping over outer mem adr. */ /* loop over outer memory addresses */ for (omadr=0;omadr>=1; } /* end of a memory bit iteration */ else if (mskcb&ybits) { /* y-direction part of transform */ /* (bitswap,butterfly,bitswap,butterfly ...) */ /* loop over memory addresses */ bitswDef; /* Code defined in "spftOptim.h" */ swbit = asiz>>1; /* use msb as swap bit */ for(madr=0;madr>1;btmsk>0;btmsk>>=1,mskcb2>>=1) { /* do the reduction if this bit is active */ if (actbits&mskcb2) { /* bitswap */ bitswapy(btmsk) /* code defined in "spfftOptim.h" */ /* butterfly and twiddle */ butterfly_code /* code defined in "spfftOptim.h" */ } } /* store back results */ arr[madr][0] = q0r; arr[madr][1] = q0i; arr[madr+swbit][0] = q1r; arr[madr+swbit][1] = q1i; } /* end looping over memory addresses */ mskcb = mskcb2; } /* end y-direction */ else if (mskcb&xbits) { /* x-direction part of transform */ /* (bitswap,butterfly,bitswap,butterfly ...) */ /* loop over memory addresses */ bitswDef; /* Code defined in "spftOptim.h" */ swbit = asiz>>1; /* use msb as swap bit */ for(madr=0;madr>1;btmsk>0;btmsk>>=1,mskcb2>>=1) { /* do the reduction if this bit is active */ if (actbits&mskcb2) { /* bitswap */ bitswapx(btmsk) /* code defined in "spfftOptim.h" */ /* butterfly and twiddle */ butterfly_code /* code defined in "spfftOptim.h" */ } } /* store back results */ arr[madr][0] = q0r; arr[madr][1] = q0i; arr[madr+swbit][0] = q1r; arr[madr+swbit][1] = q1i; } /* end looping over memory addresses */ mskcb = mskcb2; } /* end x-direction */ } /* end looping over all bits */ /* Unscramble? */ if (mode == 1) fft_srt(arr,1,ctrl); /* Pack out real-> complex? */ if (ctrl->symmetry) symm_pack(arr,mode,ctrl); return; break; /* break out of case (mode == 1) */ case 0: /* if (mode == -1 or -2) do backwards transform */ /* Pack before complex -> real */ if (ctrl->symmetry) symm_pack(arr,mode,ctrl); /* Scramble? */ if (mode == -1) fft_srt(arr,-1,ctrl); itwp = ctrl->lsttwp; /* pointer to twiddle list */ mskcb = 1; /* mask for current bit (long address) */ /* loop over all bits from lsb to msb */ while (mskcb< (1<xbits) memcb>>=lnxproc; if (mskcb>ybits) memcb>>=lnyproc; ominc = memcb<<1; /* increment in looping over outer mem adr. */ /* loop over outer memory addresses */ for (omadr=asiz-ominc;omadr>=0;omadr-=ominc) { /* loop over inner memory addresses */ for (madr=memcb-1;madr>=0;madr--) { /* compute addresses, and fetch top and bottom butterfly part */ tbad = madr | omadr; q0r = arr[tbad][0]; q0i = arr[tbad][1]; bbad = tbad | memcb; q1r = arr[bbad][0]; q1i = arr[bbad][1]; /* butterfly and twiddle */ inv_bufly_code /* code defined in "spfftOptim.h" */ /* store back results */ arr[tbad][0] = q0r; arr[tbad][1] = q0i; arr[bbad][0] = q1r; arr[bbad][1] = q1i; } /* end looping over inner addresses */ } /* end looping over outer addresses */ } /* end if active */ mskcb<<=1; } /* end of a memory bit iteration */ else if (mskcb&ybits) { /* y-direction part of transform */ /* (bitswap,butterfly,bitswap,butterfly ...) */ /* loop over memory addresses */ bitswDef; /* Code defined in "spftOptim.h" */ swbit = asiz>>1; /* use msb as swap bit */ for(madr=swbit-1;madr>=0;madr--) { /* pick data from memory */ q0r = arr[madr][0]; q0i = arr[madr][1]; q1r = arr[madr+swbit][0]; q1i = arr[madr+swbit][1]; mskcb2 = mskcb; /* mask for current bit (long address) */ for(btmsk=1;btmsk>1; /* use msb as swap bit */ for(madr=swbit-1;madr>=0;madr--) { /* pick data from memory */ q0r = arr[madr][0]; q0i = arr[madr][1]; q1r = arr[madr+swbit][0]; q1i = arr[madr+swbit][1]; mskcb2 = mskcb; /* mask for current bit (long address) */ for(btmsk=1;btmskscale; for (madr=0;madrsymmetry == 1) { for (madr=0;madractgr; swapmsk = ctrl->actbits & ((ctrl->ybits) | (ctrl->xbits)); nbits = ctrl->mbits + lnproc; mb = ctrl->mb; for (i=0;inactgr;igr++) { grbeg = actgr[2*igr]; grlen = actgr[2*igr+1]; for (i=0;i=0; i--,btmsk>>=1) { if (btmsk&swapmsk) { chmat[i*nbits + i] = 0; chmat[i*nbits + swapbit] = 1; swapbit = i; } } chmat[(nbits-1)*nbits + (nbits-1)] = 0; chmat[(nbits-1)*nbits + swapbit] = 1; perfshuf = blp_cMatNew(chmat,rhs,nbits); /* make permut which reorganise memory mapping */ /* mem_bits interlaced -> mem_bits_left */ for (i=0;il2mem_blp = imemmap; ctrl->mem2l_blp = memmap; ctrl->unscr_blp = ftperm; ctrl->scr_blp = blp_copyNew(ftperm); blp_inv(ctrl->scr_blp); /* Install post-permutation */ if (postprmut != NULL) { ctrl->post_blp = blp_copyNew(postprmut); ctrl->final_blp = blp_copyNew(postprmut); blp_rMult(ctrl->final_blp,ctrl->unscr_blp); } else { ctrl->post_blp = NULL; ctrl->final_blp = blp_copyNew(ctrl->unscr_blp); } return; }