#include #include "mpl_blas.h" #define PRECISION float visible void /*************************************************************************** * * * DATA PARALLEL BLAS based on MPL * * * * Internal routine, this routine is not supposed to be * * called by user programs. * * * * Version 1.0 1/4-92 , * * For MasPar MP-1 computers * * * * para//ab, University of Bergen, NORWAY * * * * The calling sequence may be changed in a future version. * * Please report any BUGs, ideas for improvement or other * * comments to * * adm@parallab.uib.no * * * * Future versions may then reflect your suggestions. * * The most current version of this software is available * * from netlib@nac.no , send the message `send index from maspar' * * * * REVISIONS: * * * ***************************************************************************/ /* Preload dscal Loads and stores from/to PMEM (the "M-machine") are * overlapped with subsequent execution instructions (the "E-machine"). * By priming the loop with an extra initial load, we can then overlap * subsequent loads with calculations required in the main body of the * loop. Any operations not involving the registers that are being loaded * into will operate concurrently with the memory fetch. If enough operations * are carried before the values being loaded are needed, the load is * essentially free of cost. */ #ifdef __STDMPL__ mpl_sscal( int n, register PRECISION a, register plural PRECISION *y) #else mpl_sscal(n, x, y) int n; register PRECISION a; register plural PRECISION *y; #endif { register plural PRECISION y1, y2; register int i, no_block, rest_vec; register plural PRECISION *d; no_block = NFB(n); /* number of full blocks */ rest_vec = n % nproc; if (no_block == 0) if (ENDVEC(n)) *y *= a; else if ((no_block == 1) && (rest_vec == 0)) *y *= a; if (no_block > 0){ /* Sett pointer for where to store = pointer * for where to fetch */ if (rest_vec > 0) no_block++; d = y; /* Preload first element from y arrays. */ y1 = *y; y++; /* Main loop: unroll to level two to avoid having to move y2 * into y1. Loop is terminated one iteration early to avoid * loading one element beyond the end of the y arrays when n * is even. */ for (i=0; i<(no_block-1)>>1; i++) { /* Preload even elements. */ y2 = *y; y++; /* *d++ = a * y1; */ y1 *= a; *d = y1; d++; /* Preload odd elements. */ y1 = *y; y++; /* *d++ = a * y2; */ y2 *= a; *d = y2; d++; } /* Load last element from y arrays when n is even; all elements * have already been loaded for n odd. Make the two last operations * for n even. */ if (!(no_block & 1)) { y2 = *y; y1 *= a; *d = y1; d++; if (ENDVEC(n)){ y2 *= a; *d = y2; } } /* Last operation for n odd. */ else{ if (ENDVEC(n)){ y1 *= a; *d = y1; } } } }