#include #define PRECISION float /*************************************************************************** * * * DATA PARALLEL BLAS based on MPL * * * * Internal routine, this routine is not supposed to be * * called by user programs. * * * * Version 1.0 1/4-92 , * * For MasPar MP-1 computers * * * * para//ab, University of Bergen, NORWAY * * * * The calling sequence may be changed in a future version. * * Please report any BUGs, ideas for improvement or other * * comments to * * adm@parallab.uib.no * * * * Future versions may then reflect your suggestions. * * The most current version of this software is available * * from netlib@nac.no , send the message `send index from maspar' * * * * REVISIONS: * * * ***************************************************************************/ /* An upper triangularsolver for square Matrix on a Square MasPar MP-1. The physical lay-out on the processor array, transposes the matrix. The algorithm is the straightforward column update algorithm, (but remember: Matrix columns = Physical rows of processors, Thus we update data on rows of processors) The two parts are stack upon each other. Thus l(i,j) is a resident of processor (j,i(mod 64)). The elements of b, b(i), rest on (0,j) and travels downwards as the update take place. When they arrive at the diagonal the division takes place and they take the value of the unknown (x), which is return in the final column. version2: Here we actually compute LDx = b; where L is lower unit triangular. We then don't have to do the division of the pivot in n step, but in one single in the beginning. Since division is slow on MasPar we expect this to give great improvement in the performence */ #ifdef __STDMPL__ void mpl_sussolve( register int diag, register int m, register plural PRECISION a0, plural PRECISION *b0 ) #else void mpl_sussolve(diag,m,a0,b0) register int diag; register int m; register plural PRECISION a0; plural PRECISION *b0; #endif { register int i,j,nx= nxproc,ny= nyproc,iter,mmod; register plural int ix = ixproc, iy = iyproc; register plural PRECISION btemp,temp; /* Putting the righthand side into correct starting position */ mmod = m%ny-1; if (mmod>=0) {if (iy== mmod ) btemp = xnetpN[mmod].*b0; else btemp = 0.0;} else {if (iy== ny-1 ) btemp = xnetS[1].*b0; else btemp = 0.0;} /* scaling by the diagonal */ if (ix == iy) xnetcW[m].temp = a0; if (ix < iy && iy < m) a0 = a0/temp; for (i=m-1; i>=0; i--) { if (iy == i) { if(ix ==i) xnetcW[i].temp = btemp; if (ix < i) { btemp -= a0*temp; xnetN[1].btemp = btemp; } } } /* if scaled, scale the results, spread it out and put into the pointer */ temp = btemp; if (diag != 'U' && diag != 'u') if ((ix == iy)&&(iy