/* --------------------------------------------------------------------- * * -- PBLAS routine (version 2.0) -- * University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * April 1, 1998 * * --------------------------------------------------------------------- */ /* * This file includes the standard C libraries, as well as system depen- * dent include files. All PBLAS routines include this file. * * --------------------------------------------------------------------- * Machine Specific PBLAS macros * --------------------------------------------------------------------- */ #define _HAL_ 0 #define _T3D_ 1 #define _T3E_ 2 #ifdef T3D #define _MACH_ _T3D_ #endif #ifdef T3E #define _MACH_ _T3E_ #endif #ifndef _MACH_ #define _MACH_ _HAL_ #endif /* * CBRATIO is the ratio of the transfer cost per element for the combine * sum to one process and the broadcast operation. This value is used * within the Level 3 PBLAS routines to decide on which algorithm to se- * lect. */ #define CBRATIO 1.3 /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include #include #ifdef __STDC__ #include #else #include #endif #if( ( _MACH_ == _T3D_ ) || ( _MACH_ == _T3E_ ) ) #include #endif /* * --------------------------------------------------------------------- * FORTRAN <-> C interface * --------------------------------------------------------------------- * * These macros identifies how the PBLAS will be called as follows: * * _F2C_ADD_: the FORTRAN compiler expects the name of C functions to be * in all lower case and to have an underscore postfixed it (Suns, Intel * compilers expect this). * * _F2C_NOCHANGE: the FORTRAN compiler expects the name of C functions * to be in all lower case (IBM RS6K compilers do this). * * _F2C_UPCASE: the FORTRAN compiler expects the name of C functions * to be in all upcase. (Cray compilers expect this). * * _F2C_F77ISF2C: the FORTRAN compiler in use is f2c, a FORTRAN to C * converter. */ #define _F2C_ADD_ 0 #define _F2C_NOCHANGE 1 #define _F2C_UPCASE 2 #define _F2C_F77ISF2C 3 #ifdef UpCase #define _F2C_CALL_ _F2C_UPCASE #endif #ifdef NoChange #define _F2C_CALL_ _F2C_NOCHANGE #endif #ifdef Add_ #define _F2C_CALL_ _F2C_ADD_ #endif #ifdef f77IsF2C #define _F2C_CALL_ _F2C_F77ISF2C #endif #ifndef _F2C_CALL_ #define _F2C_CALL_ _F2C_ADD_ #endif /* * --------------------------------------------------------------------- * TYPE DEFINITIONS AND CONVERSION UTILITIES * --------------------------------------------------------------------- */ #if( ( _MACH_ == _T3D_ ) || ( _MACH_ == _T3E_ ) ) #define float double /* Type of character argument in a FORTRAN call */ #define F_CHAR_T _fcd /* Character conversion utilities */ #define F2C_CHAR(a) ( _fcdtocp( (a) ) ) #define C2F_CHAR(a) ( _cptofcd( (a), 1 ) ) /* Type of FORTRAN functions */ #define F_VOID_FCT void fortran /* Subroutine */ #define F_INTG_FCT int fortran /* INTEGER function */ #else /* Type of character argument in a FORTRAN call */ typedef char * F_CHAR_T; /* Character conversion utilities */ #define F2C_CHAR(a) (a) #define C2F_CHAR(a) (a) /* Type of FORTRAN functions */ #define F_VOID_FCT void /* Subroutine */ #define F_INTG_FCT int /* INTEGER function */ #endif /* * ---------------------------------------------------------------------- * #typedef definitions * --------------------------------------------------------------------- */ typedef float cmplx [2]; typedef double cmplx16[2]; #define REAL_PART 0 #define IMAG_PART 1 #ifdef __STDC__ typedef void (*GESD2D_T) ( int, int, int, char *, int, int, int ); typedef void (*GERV2D_T) ( int, int, int, char *, int, int, int ); typedef void (*GEBS2D_T) ( int, char *, char *, int, int, char *, int ); typedef void (*GEBR2D_T) ( int, char *, char *, int, int, char *, int, int, int ); typedef void (*GSUM2D_T) ( int, char *, char *, int, int, char *, int, int, int ); typedef F_VOID_FCT (*MMADD_T) ( int *, int *, char *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*MMSHFT_T) ( int *, int *, int *, char *, int * ); typedef F_VOID_FCT (*VVDOT_T) ( int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*VVSET_T) ( int *, char *, char *, int * ); typedef F_VOID_FCT (*TZPAD_T) ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, char *, int * ); typedef F_VOID_FCT (*TZPADCPY_T) ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*TZSET_T) ( F_CHAR_T, int *, int *, int *, char *, char *, char *, int * ); typedef F_VOID_FCT (*TZSCAL_T) ( F_CHAR_T, int *, int *, int *, char *, char *, int * ); typedef F_VOID_FCT (*AXPY_T) ( int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*COPY_T) ( int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*SWAP_T) ( int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*GEMV_T) ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*AGEMV_T) ( F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*SYMV_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*ASYMV_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*HEMV_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*AHEMV_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*TRMV_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*ATRMV_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*TRSV_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*GERC_T) ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*GERU_T) ( int *, int *, char *, char *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*SYR_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*HER_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*SYR2_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*HER2_T) ( F_CHAR_T, int *, char *, char *, int *, char *, int *, char *, int * ); typedef F_VOID_FCT (*GEMM_T) ( F_CHAR_T, F_CHAR_T, int *, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*SYMM_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*HEMM_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*SYRK_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*HERK_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*SYR2K_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*HER2K_T) ( F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int *, char *, char *, int * ); typedef F_VOID_FCT (*TRMM_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); typedef F_VOID_FCT (*TRSM_T) ( F_CHAR_T, F_CHAR_T, F_CHAR_T, F_CHAR_T, int *, int *, char *, char *, int *, char *, int * ); #else typedef void (*GESD2D_T) (); typedef void (*GERV2D_T) (); typedef void (*GEBS2D_T) (); typedef void (*GEBR2D_T) (); typedef void (*GSUM2D_T) (); typedef F_VOID_FCT (*MMADD_T) (); typedef F_VOID_FCT (*MMSHFT_T) (); typedef F_VOID_FCT (*VVDOT_T) (); typedef F_VOID_FCT (*VVSET_T) (); typedef F_VOID_FCT (*TZPAD_T) (); typedef F_VOID_FCT (*TZPADCPY_T) (); typedef F_VOID_FCT (*TZSET_T) (); typedef F_VOID_FCT (*TZSCAL_T) (); typedef F_VOID_FCT (*AXPY_T) (); typedef F_VOID_FCT (*COPY_T) (); typedef F_VOID_FCT (*SWAP_T) (); typedef F_VOID_FCT (*GEMV_T) (); typedef F_VOID_FCT (*AGEMV_T) (); typedef F_VOID_FCT (*SYMV_T) (); typedef F_VOID_FCT (*ASYMV_T) (); typedef F_VOID_FCT (*HEMV_T) (); typedef F_VOID_FCT (*AHEMV_T) (); typedef F_VOID_FCT (*TRMV_T) (); typedef F_VOID_FCT (*ATRMV_T) (); typedef F_VOID_FCT (*TRSV_T) (); typedef F_VOID_FCT (*GERC_T) (); typedef F_VOID_FCT (*GERU_T) (); typedef F_VOID_FCT (*SYR_T) (); typedef F_VOID_FCT (*HER_T) (); typedef F_VOID_FCT (*SYR2_T) (); typedef F_VOID_FCT (*HER2_T) (); typedef F_VOID_FCT (*GEMM_T) (); typedef F_VOID_FCT (*SYMM_T) (); typedef F_VOID_FCT (*HEMM_T) (); typedef F_VOID_FCT (*SYRK_T) (); typedef F_VOID_FCT (*HERK_T) (); typedef F_VOID_FCT (*SYR2K_T) (); typedef F_VOID_FCT (*HER2K_T) (); typedef F_VOID_FCT (*TRMM_T) (); typedef F_VOID_FCT (*TRSM_T) (); #endif typedef struct { char type; /* Encoding of the data type */ int usiz; /* length in bytes of elementary data type */ int size; /* length in bytes of data type */ char * zero, * one, * negone; /* pointers to contants of correct type */ GESD2D_T Cgesd2d; /* BLACS functions */ GERV2D_T Cgerv2d; GEBS2D_T Cgebs2d; GEBR2D_T Cgebr2d; GSUM2D_T Cgsum2d; MMADD_T Fmmadd; /* Addition functions */ MMADD_T Fmmcadd; MMADD_T Fmmtadd; MMADD_T Fmmtcadd; MMADD_T Fmmdda; MMADD_T Fmmddac; MMADD_T Fmmddat; MMADD_T Fmmddact; MMSHFT_T Fcshft; /* Shift functions */ MMSHFT_T Frshft; VVDOT_T Fvvdotu; /* Dot functions */ VVDOT_T Fvvdotc; TZPAD_T Ftzpad; /* Array pad function */ TZPADCPY_T Ftzpadcpy; VVSET_T Fset; TZSCAL_T Ftzscal; /* Scaling functions */ TZSCAL_T Fhescal; TZSCAL_T Ftzcnjg; AXPY_T Faxpy; /* Level 1 BLAS */ COPY_T Fcopy; SWAP_T Fswap; GEMV_T Fgemv; /* Level 2 BLAS */ SYMV_T Fsymv; HEMV_T Fhemv; TRMV_T Ftrmv; TRSV_T Ftrsv; AGEMV_T Fagemv; ASYMV_T Fasymv; AHEMV_T Fahemv; ATRMV_T Fatrmv; GERC_T Fgerc; GERU_T Fgeru; SYR_T Fsyr; HER_T Fher; SYR2_T Fsyr2; HER2_T Fher2; GEMM_T Fgemm; /* Level 3 BLAS */ SYMM_T Fsymm; HEMM_T Fhemm; SYRK_T Fsyrk; HERK_T Fherk; SYR2K_T Fsyr2k; HER2K_T Fher2k; TRMM_T Ftrmm; TRSM_T Ftrsm; } PBTYP_T; #ifdef __STDC__ typedef void (*TZSYR_T) ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); typedef void (*TZSYR2_T) ( PBTYP_T *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); typedef void (*TZTRM_T) ( PBTYP_T *, char *, char *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int ); typedef void (*TZSYM_T) ( PBTYP_T *, char *, char *, int, int, int, int, char *, char *, int, char *, int, char *, int, char *, int, char *, int ); #else typedef void (*TZSYR_T) (); typedef void (*TZSYR2_T) (); typedef void (*TZTRM_T) (); typedef void (*TZSYM_T) (); #endif typedef struct { int offd; /* Global diagonal offset */ int lcmt00; /* LCM value of first block */ int mp; /* Local number of rows */ int imb1; /* Size of first row block (global) */ int imbloc; /* Size of first local row block */ int mb; /* Row block size */ int lmbloc; /* Size of last local row block */ int mblks; /* Number of local row blocks */ int iupp; /* LCM row bound for first diagonal block */ int upp; /* LCM row bound for diagonal block */ int prow; /* Relative row process coordinate */ int nprow; /* Number of process rows */ int nq; /* Local number of columns */ int inb1; /* Size of first column block (global) */ int inbloc; /* Size of first local column block */ int nb; /* Column block size */ int lnbloc; /* Size of last local column block */ int nblks; /* Number of local column blocks */ int ilow; /* LCM column bound for first diagonal block */ int low; /* LCM column bound for diagonal block */ int pcol; /* Relative column process coordinate */ int npcol; /* Number of process columns */ int lcmb; /* Least common multiple of nprow * mb and npcol * nb */ } PB_VM_T; /* * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define INT 'I' /* type identifiers */ #define SREAL 'S' #define DREAL 'D' #define SCPLX 'C' #define DCPLX 'Z' #define crot_ CROT