    /*        Fast GEMM routine for Alpha                  */
    /*           Linux, Digital UNIX and NT/Alpha          */
    /*        by Kazushige Goto <goto@statabo.rim.or.jp>   */


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/times.h>
#include <sys/time.h>
#include <time.h>
#include "common.h"
#include "bmcommon.h"

int main(int argc, char *argv[]){

  FLOAT *a, *b, *c=NULL, *d;
  int i, j, errcount;
  int transa, transb;
  complex_t alpha = {3.0, 4.0};
  complex_t beta  = {1.0, 0.0};
  int size    = 400;
  int ld;
  int check   = 0;
  int only_nn = 0;

#ifndef SIMPLE
  fprintf(stderr, "\n\tMatrix-Matrix Multiply"
#ifdef DGEMM
	  "(Double Precision, Complex) "
#else
	  "(Single Precision, Complex) "
#endif
	  "Benchmark\n"
	          "\t\t\t by Kazushige Goto <goto@statabo.rim.or.jp>\n\n");
#endif

  argc--;argv++; 

  if (argc > 0) { size    = atol(*argv);		argc--; argv++;}
  ld = size;
  if (argc > 0) { ld      = MAX(size, atol(*argv));	argc--; argv++;}
  if (argc > 0) { check   = atoi(*argv);		argc--; argv++;}
  if (argc > 0) { alpha.r = atof(*argv);		argc--; argv++;}
  if (argc > 0) { alpha.i = atof(*argv);		argc--; argv++;}
  if (argc > 0) { beta.r  = atof(*argv);		argc--; argv++;}
  if (argc > 0) { beta.i  = atof(*argv);		argc--; argv++;}
  if (argc > 0) { only_nn = atoi(*argv);		argc--; argv++;}

#ifndef SIMPLE
  fprintf(stderr, " Size = %4d Leading Size = %4d\n", size, ld);
  fprintf(stderr, "\tAlpha = (%e,%e) Beta = (%e,%e)\n",
	  alpha.r, alpha.i, beta.r, beta.i);

  if (!check) fprintf(stderr, "\n");
#endif

  if (( a=(FLOAT *)malloc(sizeof(FLOAT) * size * ld * 2)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }
  
  if (( b=(FLOAT *)malloc(sizeof(FLOAT) * size * ld * 2)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }
  
  if (check){
    if (( c=(FLOAT *)malloc(sizeof(FLOAT) * size * ld * 2)) == NULL){
      fprintf(stderr,"Out of Memory!!\n");exit(1);
    }
  }

  if (( d=(FLOAT *)malloc(sizeof(FLOAT) * size * ld * 2)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }
  
  srandom(getpid());

  for (transa=0; transa < 2; transa++){
    for (transb=0; transb < 2; transb++){

      for(i = 0; i < size; i++) for(j = 0; j < ld*2; j++){
	d[i*ld +j]= rand() / ((double) RAND_MAX + 1.0 )*10.0;
	if (check){
	  c[i*ld +j]=d[i*ld+j];
	}
      }
      
      for(i = 0; i < size; i++)
	for(j = 0; j < ld*2; j++){
	  b[i*ld +j]= rand() / ((double) RAND_MAX + 1.0 )*10.0;
	}

      for(i = 0; i < size; i++)
	for(j = 0; j < ld*2; j++){
	  a[i*ld +j]= rand() / ((double) RAND_MAX + 1.0 )*10.0;
	}

      if (check){
	fprintf(stderr, "\n%s x %s Original  :", trans[transa], trans[transb]);
	start();
	ZGEMMC_(trans[transa], trans[transb], &size, &size, &size, 
	       &alpha.r, a, &ld, b, &ld, &beta.r, c, &ld);
	stop();
	fprintf(stderr, "%9.3f MFLOPS(%6.3f sec)\n", 
		mflops(size)*4.0, secs());
      }

#ifndef SIMPLE
      fprintf(stderr, "%s x %s Optimized :", trans[transa], trans[transb]);
#endif

      start();
      ZGEMM_(trans[transa], trans[transb], &size, &size, &size, 
	    &alpha.r, a, &ld, b, &ld, &beta.r, d, &ld);
      stop();
#ifndef SIMPLE
      fprintf(stderr, "%9.3f MFLOPS(%6.3f sec)\n", 
	      mflops(size)*4.0, secs());
#else
      fprintf(stdout, "%8.3f", mflops(size)*4.0);
#endif
      
      if (check){
	errcount = 0;
	for(i = 0; i < size; i++){
	  for(j = 0;j < ld*2; j++){
	    if (diffs(c[i*ld+j], d[i*ld+j])){
	      fprintf(stderr, "%3d %3d :%6.1f %6.1f\n",
		      i, j, c[i*ld +j], d[i*ld+j]);
	      errcount ++;
	      if (errcount > 3) exit(1);
	    }
	  }
	}
      }
      if (only_nn) goto finish;
    }
  }
     
 finish:
  free(a);
  free(b);
  if (check) free(c);
  free(d);
#ifndef SIMPLE
  fprintf(stderr, "\n");
#endif
  return 0;
}
