#include #define PRECISION float /*************************************************************************** * * * DATA PARALLEL BLAS based on MPL * * * * Internal routine, this routine is not supposed to be * * called by user programs. * * * * Version 1.0 1/4-92 , * * For MasPar MP-1 computers * * * * para//ab, University of Bergen, NORWAY * * * * The calling sequence may be changed in a future version. * * Please report any BUGs, ideas for improvement or other * * comments to * * adm@parallab.uib.no * * * * Future versions may then reflect your suggestions. * * The most current version of this software is available * * from netlib@nac.no , send the message `send index from maspar' * * * * REVISIONS: * * * ***************************************************************************/ /* A triangularsolver for square blocs that fits on a square MasPar. The matrix is suppose to be lower triangular, but the physical lay-out on the processor array, transposes the matrix. The algorithm is the straightforward column update algorithm, (but remember: Matrix columns = Physical rows of processors, Thus we update date on rows of processors) The elements of b, b(i), rest on (0,i) and travels downwards as the update take place. When they arrive at the diagonal the division takes place and they take the value of the unknown (x), which is return in the final column. Initialy we scale the matrix. Thus we actually compute LD'y = b; followed by Dx = y. L is lower and LD' is unit lower. We then don't have to do the division of the pivot in n step, but in a single one at the beginning. Since division is slow on MasPar this trick gave a great improvement in the performence. */ #ifdef __STDMPL__ void mpl_slssolve( register int diag, register int m, register plural PRECISION a0, plural PRECISION *b0 ) #else void mpl_slssolve(diag,m,a0,b0) register int diag; register int m; register plural PRECISION a0; plural PRECISION *b0; #endif { register int i,ii,j,nx = nxproc,ny = nyproc,iter; register plural int ix = ixproc, iy = iyproc,iy2; register plural PRECISION btemp,temp; if (iy==0) btemp = *b0; else btemp = 0.0; iter = ny; if (m iy && ix < m) a0 = a0/temp; /* forward subsitution loop */ for (i=0; i< iter; i++) { if (iy == i) { if(ix ==i) xnetcE[m-i].temp = btemp; if (ix > i) { btemp -= a0*temp; xnetS[1].btemp = btemp; } } } /* if scaled, scale the results, spread it out and put into the pointer */ temp = btemp; if (diag != 'U' && diag != 'u') if (ix == iy && ix < m) btemp = btemp/a0; if (ix == iy) xnetcE[nx-1].btemp = btemp; *b0 = btemp; }