/* p3pack - PERMUTATION LIBRARY FOR MASPAR --------------------------------------- Version 1.0 ----------- Author: ------ Hans Munthe-Kaas, Department of computer science, University of Bergen, N-5020 Bergen. Email: hans@ii.uib.no Conditions for use: ------------------ *************************************************************************** * For non-commercial use and research, the codes are available free of * * charge. The author would appreciate hearing from users of the code. * * It is not allowed to use the code in commercial software without * * written permission from the author. * * The authors name should never be removed from the source code. * *************************************************************************** Reference: --------- Munthe-Kaas H.: "Practical Parallel Permutation Procedures", to appear. Preprint available from the author. */ #include #include "p3pack.h" /***************************************************************************/ BL_PERMUT blp_identNew(int nbits) /* Create the identity bit linear permutation */ { BL_PERMUT C; unsigned *pm; int i; MALLOC(pm,nbits+1,unsigned); pm[nbits] = 0; for (i=0;inbits = nbits; C->pmat = pm; return(C); } /***************************************************************************/ BL_PERMUT blp_btprmNew(int *btprm, int nbits) /* Create bit linear permutation from bit-permutation vector */ { BL_PERMUT C; unsigned *pm; int i; MALLOC(pm,nbits+1,unsigned); pm[nbits] = 0; for (i=0;inbits = nbits; C->pmat = pm; return(C); } /***************************************************************************/ BL_PERMUT blp_iMatNew(int *A, int *b, int nbits) /* Create bit linear permutation from int matrix A and rhs b */ { BL_PERMUT C; unsigned *pm; int i,j,piv; MALLOC(pm,nbits+1,unsigned); for (i=0;inbits = nbits; C->pmat = pm; return(C); } /***************************************************************************/ BL_PERMUT blp_cMatNew(char *A, char *b, int nbits) /* Create bit linear permutation from char matrix A and rhs b */ { BL_PERMUT C; unsigned *pm; int i,j,piv; MALLOC(pm,nbits+1,unsigned); for (i=0;inbits = nbits; C->pmat = pm; return(C); } /*****************************************************************************/ BL_PERMUT blp_copyNew(BL_PERMUT A) /* Create and copy a bit-linear permutation */ { BL_PERMUT C; int i; MALLOC(C,1,BL_PERMUT_STRUCT); MALLOC(C->pmat,A->nbits+1,unsigned); C->nbits = A->nbits; for(i=0; i <= A->nbits; i++) C->pmat[i] = A->pmat[i]; return(C); } /*****************************************************************************/ void blp_copy(BL_PERMUT A, BL_PERMUT B) /* Copy a bit-linear permutation to existing permut. of same size */ { int i; if (A->nbits != B->nbits) ERROR("blp_copy: Matrices are of different size!\n"); for(i=0; i <= B->nbits; i++) A->pmat[i] = B->pmat[i]; return; } /*****************************************************************************/ void blp_free(BL_PERMUT A) /* Remove data allocated for bit-linear permutation A */ { FREE(A->pmat); FREE(A); return; } /*****************************************************************************/ void blp_print(BL_PERMUT A) /* Print bit-linear permutation A */ { int i,j; unsigned ipiv; printf(" Matrix Rhs. \n"); printf("------------------------------------------------------\n"); for (i=0,ipiv=1;inbits;i++,ipiv<<=1) { printf("( "); for (j=0;jnbits;j++) { if (A->pmat[j] & ipiv) printf("1 "); else printf("0 "); } if (A->pmat[A->nbits] &ipiv) printf(") ( 1 )\n"); else printf(") ( 0 )\n"); } printf("------------------------------------------------------\n"); } /***************************************************************************/ void blp_lMult(BL_PERMUT A, BL_PERMUT B) /* Computes the product of two permutations: A = B*A . Answer is returned by updating A. */ { register unsigned tmp; unsigned *amat,*bmat; unsigned ipiv; int i,j,nbits; if (A->nbits != B->nbits) ERROR("blp_lMult : Matrices are of different size!\n"); nbits = A->nbits; amat = A->pmat; bmat = B->pmat; for (i=0;i<=nbits;i++) { tmp = 0; for (j=0,ipiv=1; jnbits != B->nbits) ERROR("blp_rmult : Matrices are of different size!"); nbits = A->nbits; amat = A->pmat; bmat = B->pmat; MALLOC(tmp,nbits+1,unsigned); for (i=0;i<=nbits;i++) { tmp[i] = 0; for (j=0,ipiv=1; jpmat; nbits = A->nbits; BMat[nbits] ^= 1<=0;ib--) { piv = 1<nbits; if (nbits < lnproc) { ERROR("PxBlp_rout: Permutation is smaller than MasPar.\n"); } nadr = 1<<(nbits-lnproc); /* Allocate work arrays */ MALLOC(A,nbits+1,unsigned); MALLOC(Mi,nbits+1,unsigned); P_MALLOC(tab0,nadr,plural unsigned); P_MALLOC(tab1,nadr,plural unsigned); /* Allocate return structure */ MALLOC(prm_data,1,PERMUT_ROUT_STRUCT); P_MALLOC(memfetch,nadr,plural unsigned short); P_MALLOC(procsto,nadr,plural unsigned short); P_MALLOC(endswap,nadr,plural unsigned short); /* computation of P and inv(N) */ { register int ib,jb, pivpos; register unsigned pivpt, pivcol, newpiv; for (ib=0; ib<=nbits; ib++) A[ib] = B->pmat[ib]; for (ib=nbits-1; ib>=lnproc; ib--) { pivcol = A[ib]; if (pivcol == 0) ERROR("PxBlp_rout : Singular matrix!"); pivpt = 1<>1; pivpos = ib-1; while (!(newpiv&pivcol)) { newpiv >>=1; pivpos--; } Pvot[ib] = pivpos; for (jb=nbits; jb>=0; jb--) if (A[jb]&newpiv) A[jb] ^= pivpt; } else { Pvot[ib] = -1; /* indicates no pivoting */ } /* elimination */ A[ib] ^= pivpt; pivcol = A[ib]; for (jb=nbits; jb>=0; jb--) if (A[jb] & pivpt) A[jb] ^= pivcol; A[ib] ^= pivpt; } /* compute inv(M) */ Mi[nbits] = 0; for (ib=0,pivpt=1; ib= 0) Mi[pivpos] ^= Mi[ib]; } } /* compute memory fetch and processor store addresses */ { register int i; register unsigned mad, ipiv, procmsk; register plural unsigned tadr, tadr2, vec1; procmsk = (1<>lnproc; tadr2 = (mad<>lnproc; } } /* Compute the swaps necessary to finally unscramble the data */ { register unsigned mad; register plural unsigned t0, t1; /* let tab1 be inverse permutation of tab0 */ for(mad=0;madmfch = memfetch; prm_data->psto = procsto; prm_data->eswp = endswap; prm_data->nmem = nadr; return(prm_data); } /*****************************************************************************/ PERMUT_ROUT blp_rout(BL_PERMUT B) /* Compute routing for bit linear permutation */ { /* Algorithm: Compute binary gaussian factorization of permutation matrix B = M*P*N where M and N only act on memory part of address and P only on processor part. Let totadr denote the total address, i.e. totadr = (procadr,memadr)'. The permutation will procede in 2 stages: 1) inv(N)*P*N*totadr (by indirect fetching, routersend and store back). 2) M*N*totadr (by a sequence of swaps in local memory). */ register unsigned *A, *MNi; register int Pvot[8*sizeof(unsigned)],nadr,nbits; register plural unsigned *tab0, *tab1; register plural unsigned short *memfetch, *procsto, *endswap; register PERMUT_ROUT prm_data; char anyprsto, anyendsw; anyprsto = 0; anyendsw = 0; nbits = B->nbits; if (nbits < lnproc) { ERROR("blp_rout: Permutation is smaller than MasPar.\n"); } nadr = 1<<(nbits-lnproc); /* Allocate work arrays */ MALLOC(A,nbits+1,unsigned); MALLOC(MNi,nbits+1,unsigned); P_MALLOC(tab0,nadr,plural unsigned); P_MALLOC(tab1,nadr,plural unsigned); /* Allocate return structure */ MALLOC(prm_data,1,PERMUT_ROUT_STRUCT); P_MALLOC(memfetch,nadr,plural unsigned short); P_MALLOC(procsto,nadr,plural unsigned short); P_MALLOC(endswap,nadr,plural unsigned short); /* computation of P and inv(N) */ { register int ib,jb, pivpos; register unsigned pivpt, pivcol, newpiv; for (ib=0; ib<=nbits; ib++) A[ib] = B->pmat[ib]; for (ib=nbits-1; ib>=lnproc; ib--) { pivcol = A[ib]; if (pivcol == 0) ERROR("blp_rout : Singular matrix!"); pivpt = 1<>1; pivpos = ib-1; while (!(newpiv&pivcol)) { newpiv >>=1; pivpos--; } Pvot[ib] = pivpos; for (jb=nbits; jb>=0; jb--) if (A[jb]&newpiv) A[jb] ^= pivpt; } else { Pvot[ib] = -1; /* indicates no pivoting */ } /* elimination */ A[ib] ^= pivpt; pivcol = A[ib]; for (jb=nbits; jb>=0; jb--) if (A[jb] & pivpt) A[jb] ^= pivcol; A[ib] ^= pivpt; } /* compute inv(MN) */ for (ib=nbits; ib>=0; ib--) MNi[ib] = (A[ib]) >> lnproc; /* MNi = inv(N) */ for (ib=lnproc; ib= 0) MNi[pivpos] ^= MNi[ib]; } } /* compute memory fetch and processor store addresses */ { register int i; register unsigned mad, ipiv, procmsk; register plural unsigned tadr, vec1, vec2; procmsk = (1<> lnproc; procsto[mad] = vec1 & procmsk; if (procsto[mad] != iproc) anyprsto = 1; tab0[mad] = vec2; } } /* Compute the swaps necessary to finally unscramble the data */ /* let tab1 be inverse permutation of tab0 */ { register unsigned mad; register plural unsigned t0, t1; for(mad=0;madmfch = memfetch; prm_data->psto = procsto; prm_data->eswp = endswap; prm_data->nmem = nadr; return(prm_data); } /*****************************************************************************/ plural unsigned* blp_DestAdMat(BL_PERMUT A) /* Returns a matrix of destination addresses for the linear permutation defined by bit-linear permutation A */ { plural unsigned *DstAd; register plural unsigned tadr, dest; register unsigned ipiv; unsigned *pmat; int nbits, mad, nadr, i; pmat = A->pmat; nbits = A->nbits; if (nbits < lnproc) { ERROR("blp_DestAdMat: Permutation is smaller than MasPar.\n"); } nadr = 1<<(nbits-lnproc); P_MALLOC(DstAd,nadr,plural unsigned); for(mad=0;madpmat; nbits = A->nbits; dest = 0; for (i=0,ipiv=1; ipmat; nbits = A->nbits; dest = 0; for (i=0,ipiv=1; imfch); P_FREE(A->psto); P_FREE(A->eswp); FREE(A); return; } /*****************************************************************************/ #define _permut(type) { \ register plural unsigned type *tmparr, *arrpt; \ register plural unsigned type * plural plarrpt; \ register plural unsigned type tmp, tmp1; \ tmparr = (plural unsigned type *) arr; \ if (direction == 1) { \ if (pstore) \ for (mad=0;mad=0;mad--) { \ tmp2 = eswap[mad]; \ if (tmp2 != mad) { \ arrpt = &(tmparr[nblk*mad]); \ plarrpt = &(tmparr[nblk*tmp2]); \ for (i=0;i=0;mad--) { \ tmp3 = pstore[mad]; \ if (tmp3 != ipr) { \ tmp2 = mfetch[mad]; \ plarrpt = &(tmparr[nblk*tmp2]); \ for (i=0;inmem; mfetch = A->mfch; pstore = A->psto; eswap = A->eswp; if (blksiz==8) { _permut(long long); } else if (blksiz==4) { _permut(long); } else if (blksiz==2) { _permut(short); } else if (blksiz==1) { _permut(char); } else { ERROR("permut: Illegal value for blksiz! Legal values are: 1 2 4 and 8.\n"); } } /*****************************************************************************/ void permut32(void *arr, int direction, PERMUT_ROUT A) { int nmem,mad; register plural unsigned long tmp, tmp1; plural unsigned short *mfetch,*pstore,*eswap; register plural unsigned short tmp2,tmp3,ipr; plural unsigned long *array; array = (plural unsigned long *) arr; ipr = iproc; nmem = A->nmem; mfetch = A->mfch; pstore = A->psto; eswap = A->eswp; if (direction == 1) { if (pstore) for (mad=0;mad=0;mad--) { tmp2 = eswap[mad]; if (tmp2 != mad) { tmp1 = array[tmp2]; tmp = array[mad]; array[mad] = tmp1; array[tmp2] = tmp; } } if (pstore) for (mad=nmem-1;mad>=0;mad--) { tmp3 = pstore[mad]; if (tmp3 != ipr) { tmp2 = mfetch[mad]; tmp = array[tmp2]; tmp = router[tmp3].tmp; array[tmp2] = tmp; } } } } /*****************************************************************************/ void permut64(void *arr, int direction, PERMUT_ROUT A) { int nmem,mad; register plural unsigned long long tmp, tmp1; plural unsigned short *mfetch,*pstore,*eswap; register plural unsigned short tmp2,tmp3,ipr; plural unsigned long long *array; array = (plural unsigned long long *) arr; ipr = iproc; nmem = A->nmem; mfetch = A->mfch; pstore = A->psto; eswap = A->eswp; if (direction == 1) { if (pstore) for (mad=0;mad=0;mad--) { tmp2 = eswap[mad]; if (tmp2 != mad) { tmp1 = array[tmp2]; tmp = array[mad]; array[mad] = tmp1; array[tmp2] = tmp; } } if (pstore) for (mad=nmem-1;mad>=0;mad--) { tmp3 = pstore[mad]; if (tmp3 != ipr) { tmp2 = mfetch[mad]; tmp = array[tmp2]; tmp = router[tmp3].tmp; array[tmp2] = tmp; } } } } /*****************************************************************************/ int pmr_PxBlpCheck(BL_PERMUT A, plural unsigned p(plural unsigned tadr, int mode, void *vp), PERMUT_ROUT R) { plural unsigned *dstaddr, *totaddr; int nmem,mad,ierr; ierr = 0; nmem = R->nmem; P_MALLOC(totaddr,nmem,plural unsigned); dstaddr = blp_DestAdMat(A); for (mad=0;madnmem; P_MALLOC(totaddr,nmem,plural unsigned); Ainv = blp_copyNew(A); blp_inv(Ainv); srcaddr = blp_DestAdMat(Ainv); for (mad=0;mad