#include #include "mpl_blas.h" #define PRECISION float /*************************************************************************** * * * DATA PARALLEL BLAS based on MPL * * * * Internal routine, this routine is not supposed to be * * called by user programs. * * * * Version 1.0 1/4-92 , * * For MasPar MP-1 computers * * * * para//ab, University of Bergen, NORWAY * * * * The calling sequence may be changed in a future version. * * Please report any BUGs, ideas for improvement or other * * comments to * * adm@parallab.uib.no * * * * Future versions may then reflect your suggestions. * * The most current version of this software is available * * from netlib@nac.no , send the message `send index from maspar' * * * * REVISIONS: * * * ***************************************************************************/ /* The following routine performs upper triangular solve with multiple * right hand sides using blocking. * Each block is square and has sides == nxproc. On rectangular machines * it therefore uses two grids per block. * The routine performs triangular solves on the diagonal blocks in the * A matrix and the B-blocks level to them, and updates B-blocks below * using matrix multiplication. * * Last bug fix: July 8. -92, TS * */ #ifdef __STDMPL__ void mpl_strsm_upn( int int_diag, int m, int n, plural PRECISION *a, int lda, plural PRECISION *b ) #else void mpl_strsm_upn(int_diag,m,n,a,lda,b) int int_diag, m, n; plural PRECISION *a; int lda; plural PRECISION *b; #endif { char diag; plural PRECISION b0_ps,b1_ps,a0_ps,a1_ps; register int nx=nxproc,ny=nyproc,am,bn,i,j,k,nbxA,nbyA,nbyB,nbxB; register plural int ix=ixproc,iy=iyproc; if (int_diag) diag = 'u'; else diag = 'n'; /* NB ! Below we mean by "number of blocks", the number of grids used * to store the arrays */ nbxA = NBX(m);/* Number of blocks in row direction on A matrix */ nbyB = NBY(n);/* Number of blocks in column direction on B matrix */ nbyA = NBY(m);/* Number of blocks in column direction on A matrix */ nbxB = NBX(m);/* Number of blocks in row direction on B matrix */ /* Variables b0_ps and b1_ps are temporaries used to hold preskewed blocks * of the B matrix to be used in matrix multiplications. * Similarly for the variables a0_ps and a1_ps. */ /* In case of rectangular MP-1, routines called from this routines always operate on pairs of blocks A and B. That means that in cases of odd number of blocks in column direction it operates on data not initialized. This may cause problems!!! */ /* initializing boundary blocks to zero on processors off bounds */ bn = n&(ny-1); am = m&(nx-1); if ((ix >= am)&&(am>0)){ for (i=1;i<=nbyB; i++) b[i*nbxB-1] = 0.0; } if ((iy >= bn)&&(bn>0)) { for (i=0;i= 0; i--){ if ((i== (nbxA-1)) && (m&(nx-1))) am = m&(nx-1); else am = nx; /* solve for all blocks in bloc-row i of B */ for(k=0;k=0;j--){ mpl_sq_spsN(a[j+nbxA*i],&a0_ps,am,nx); a0_ps *= -1; mpl_sq_smul('n',a0_ps,b0_ps,&b[j+nbxB*k],nx,bn,am); } } } } else /**************** if machine is rectangular ****************/ { for (i=nbxA-1; i>= 0; i--){ if ((i== (nbxA-1)) && (m&(nx-1))) am = m&(nx-1); else am = nx; /* solve for all blocks in bloc-row i of B */ for(k=0;k<(nbyB+1)/2;k++){ if ((k == (nbyB+1)/2-1) && (n&(nx-1))) bn = n&(nx-1); /* n%nxproc */ else bn = nx; mpl_stri_run(diag,am,bn,a[i*nbxA*2+i],a[(2*i+1)*nbxA+i], &b[i+nbxB*2*k],&b[(2*k+1)*nbxB+i]); /* preskewing B blocks */ mpl_rec_spsW(b[i+nbxB*2*k],b[(2*k+1)*nbxB+i],&b0_ps,&b1_ps, bn,am); /* Updating upper part of B using matrixmultiplication */ for(j=i-1;j>=0;j--){ mpl_rec_spsN(a[j+nbxA*2*i],a[(2*i+1)*nbxA+j],&a0_ps,&a1_ps,am,nx); a0_ps *= -1; a1_ps *= -1; mpl_rec_smul('n',a0_ps,a1_ps,b0_ps,b1_ps, &b[j+nbxB*2*k],&b[(2*k+1)*nbxB+j],nx,bn,am); } } } } }