    /*        Fast GEMM routine for Alpha 21164/21264      */
    /*         on  Linux, Digital UNIX                     */
    /*        by Kazushige Goto <goto@statabo.rim.or.jp>   */

#ifndef COMMON_H
#define COMMON_H

#define ZERO 0.0000
#define ONE  1.0000

#ifdef DGEMM

#define GEMM	   dgemm
#define GEMM_	   dgemm_
#define GEMMC	   dgemmc
#define GEMMC_	   dgemmc_
#define GEMM_NN	   dgemm_nn
#define GEMM_NT	   dgemm_nt
#define GEMM_TN	   dgemm_tn
#define GEMM_TT	   dgemm_tt
#define GEMM_BETA  dgemm_beta

#define ZGEMM      zgemm
#define ZGEMM_     zgemm_
#define ZGEMMC_    zgemmc_
#define ZGEMM_NN   zgemm_nn
#define ZGEMM_CN   zgemm_cn
#define ZGEMM_TN   zgemm_tn
#define ZGEMM_NC   zgemm_nc
#define ZGEMM_NT   zgemm_nt
#define ZGEMM_CC   zgemm_cc
#define ZGEMM_CT   zgemm_ct
#define ZGEMM_TC   zgemm_tc
#define ZGEMM_TT   zgemm_tt
#define ZGEMM_NR   zgemm_nr
#define ZGEMM_TR   zgemm_tr
#define ZGEMM_CR   zgemm_cr
#define ZGEMM_RN   zgemm_rn
#define ZGEMM_RT   zgemm_rt
#define ZGEMM_RC   zgemm_rc
#define ZGEMM_RR   zgemm_rr
#define ZGEMM_BETA zgemm_beta

#define FLOAT	double

#define SIZE	8
#define LD	ldt
#define ST	stt
#define SXADDQ	s8addq
#define MUL	mult
#define ADD	addt
#define SUB	subt

#define GEMM_PARAM_T  dgemm_param_t
#define ZGEMM_PARAM_T zgemm_param_t
#define GEMM_THREAD   dgemm_thread
#define ZGEMM_THREAD  zgemm_thread

#else

#define GEMM	   sgemm
#define GEMM_	   sgemm_
#define GEMMC	   sgemmc
#define GEMMC_	   sgemmc_
#define GEMM_NN	   sgemm_nn
#define GEMM_NT	   sgemm_nt
#define GEMM_TN	   sgemm_tn
#define GEMM_TT	   sgemm_tt
#define GEMM_BETA  sgemm_beta

#define ZGEMM      cgemm
#define ZGEMM_     cgemm_
#define ZGEMMC_    cgemmc_
#define ZGEMM_NN   cgemm_nn
#define ZGEMM_CN   cgemm_cn
#define ZGEMM_TN   cgemm_tn
#define ZGEMM_NC   cgemm_nc
#define ZGEMM_NT   cgemm_nt
#define ZGEMM_CC   cgemm_cc
#define ZGEMM_CT   cgemm_ct
#define ZGEMM_TC   cgemm_tc
#define ZGEMM_TT   cgemm_tt
#define ZGEMM_NR   cgemm_nr
#define ZGEMM_TR   cgemm_tr
#define ZGEMM_CR   cgemm_cr
#define ZGEMM_RN   cgemm_rn
#define ZGEMM_RT   cgemm_rt
#define ZGEMM_RC   cgemm_rc
#define ZGEMM_RR   cgemm_rr
#define ZGEMM_BETA cgemm_beta

#define FLOAT	float

#define SIZE	4
#define LD	lds
#define ST	sts
#define SXADDQ	s4addq
#define MUL	muls
#define ADD	adds
#define SUB	subs

#define GEMM_PARAM_T  sgemm_param_t
#define ZGEMM_PARAM_T cgemm_param_t
#define GEMM_THREAD   sgemm_thread
#define ZGEMM_THREAD  cgemm_thread

#endif

#if 0
#ifdef linux
#define WH64(a)		.long (0x18<<26 | 0x1f << 21 | (a)<<16 | 0xf800)
#else
#define WH64(a)		wh64	($##a)
#endif
#else
#define WH64(a)		lds	$f31, 0($##a)
#endif

/* for x >= 0, split the address x into x_h and x_l
   such that
        x = x_h * 65536 + x_l
   where x_l in [-32768, 32767] that is [-(1<<15), (1<<15)-1]
   the formula acturally used is
        x + (1<<15) = ( x_h<<16 ) + ( x_l + (1<<15) )
*/
#define Address_H(x) (((x)+(1<<15))>>16)
#define Address_L(x) ((x)-((Address_H(x))<<16))

#ifndef ASSEMBLER

#define MIN(a,b)   (a>b? b:a)
#define MAX(a,b)   (a<b? b:a)

void xerbla_(char *, int *info, long);

int GEMM_NN(int, int, int, FLOAT, FLOAT *, int, FLOAT* ,
	      int, FLOAT *, int, FLOAT *);
int GEMM_TN(int, int, int, FLOAT, FLOAT *, int, FLOAT* ,
	      int, FLOAT *, int, FLOAT *);
int GEMM_NT(int, int, int, FLOAT, FLOAT *, int, FLOAT* ,
	      int, FLOAT *, int, FLOAT *);
int GEMM_TT(int, int, int, FLOAT, FLOAT *, int, FLOAT* ,
	      int, FLOAT *, int, FLOAT *);
int GEMM_BETA(int m, int n, FLOAT *c, int ldc, FLOAT beta);

int ZGEMM_NN(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_NT(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_NC(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_NR(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_TN(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_TT(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_TC(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_TR(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_CN(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_CT(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_CC(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_CR(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_RN(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_RT(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_RC(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);
int ZGEMM_RR(int m, int n, int k, FLOAT *alpha, FLOAT *a,
	      int lda, FLOAT *b, int ldb, FLOAT *c, int ldc, FLOAT *buffer);

int ZGEMM_BETA(int m, int n, FLOAT *c, int ldc, FLOAT beta_r, FLOAT beta_i);

/* the info number used to differentiate abnormal exit from Goto's code */
#define INFO_NUM   99
#endif

#endif
