#include #include "mpl_blas.h" #define PRECISION float /*************************************************************************** * * * DATA PARALLEL BLAS based on MPL * * * * Internal routine, this routine is not supposed to be * * called by user programs. * * * * Version 1.0 1/4-92 , * * For MasPar MP-1 computers * * * * para//ab, University of Bergen, NORWAY * * * * The calling sequence may be changed in a future version. * * Please report any BUGs, ideas for improvement or other * * comments to * * adm@parallab.uib.no * * * * Future versions may then reflect your suggestions. * * The most current version of this software is available * * from netlib@nac.no , send the message `send index from maspar' * * * * REVISIONS: * * * ***************************************************************************/ /* The following routine performs upper triangular solve with multiple * right hand sides on m*m A-matrices and m*n B matrices with * (1<= m <= nxproc) and (1<= n <= nxproc). * Here the A matrix is the triangle, while the B matrix represents the * collection of rhs's arranged columnwise. * After execution the B matrix is overwritten by the answers. * This implementation is based on the standard column oriented back * substitution algorithm. The systems are solved concurrently. This is * done by spreading the appropriate column of the A matrix to all * columns on the PE grid, so that it can be used in processing each * B vector. * This routine applies to square machines only, and the data storage * is as follows: * The m uppermost and m leftmost PE's contains one element each of the * A matrix in the plural variable a, while m uppermost and n leftmost * PE's contains the B matrix in the plural variable b. Both variables * are parameters to the routine, and the matrices are expected to come * transposed. * * Virtually it then looks like this (A matrix): * * * * | * 0 0 0 0 0 0 . . .| * | * * 0 0 0 0 0 | * a | * * * 0 0 0 0 | * | * * * * 0 0 0 | nyproc * | * * * * * 0 0 ? | * | * * * * * * 0 | * | * * * * * * * | * | . . | * | . ? . | * | . .| * * <--------- nxproc --------> * * Last bug fix: July 6. -92, TS * */ #ifdef __STDMPL__ void mpl_stri_sun(register int diag, register int m, register int n, register plural PRECISION a, plural PRECISION *b) #else void mpl_stri_sun(diag,m,n,a,b) register int diag; register int m,n; register plural PRECISION a; plural PRECISION *b; #endif { register int i,nx=nxproc,ny=nyproc; register plural int ix=ixproc,iy=iyproc; plural PRECISION b_wrk,temp,a_wrk; b_wrk = *b; /* * This branch is executed if the A matrix is exactly * nxproc * nxproc large */ if (m == nx) { if ((diag != 'u') && (diag != 'U')) { /* Scaling A-matrix by its diagonal */ if (ix == iy) xnetcW[nx].temp = a; if (ix=0; i--) { /* Spreading (i+nyproc)'th A-column to all other columns */ if (iy == i) xnetcN[ny].a_wrk = a; /* Updating entries above row i */ if (ix == i) xnetcW[nx].temp = b_wrk; if (ix < i) b_wrk -= a_wrk * temp; } if ((diag != 'u') && (diag != 'U')) /* scaling result by diagonal */ { if (ix == iy) xnetcS[ny].temp = a; b_wrk /= temp; } } else /* if (m != nx) *****************************************/ /* * This branch is executed for problems where the A matrix * is less than nxproc * nxproc */ { if ((diag != 'u') && (diag != 'U')) { /* Scaling A matrix by its diagonal */ if (ix == iy) xnetcW[nx].temp = a; if ((ix=0;i--) { /* Spreading i'th A-column to all other columns */ if (iy == i) xnetcN[ny].a_wrk = a; /* Updating entries below row i */ if (ix == i) xnetcW[nx].temp = b_wrk; if (ix < i) b_wrk -= a_wrk * temp; } if ((diag != 'u') && (diag != 'U')) { /* scaling result by diagonal */ if (ix == iy) xnetcS[ny].temp = a; if (ix < m) b_wrk /= temp; } } if ((iy < n) && (ix < m)) *b =b_wrk; }