#line 1 "-"
#line 1 "gstrf_gp.m4"

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <ctype.h>
#if __STD_C
#include <stddef.h>   /* for size_t */
#else
#include <sys/types.h>
#endif
/*extern void* malloc(unsigned int);*/

#define TRUE  1
#define FALSE 0

#line 1 "gstrf_gp.m4:cinclude.m4"

#line 9

























#define character_t  m4character_t
#define integer_t  m4integer_t
#define real_t  m4real_t
#define double_t  double
#define complex_t  m4complex_t
#define dcomplex_t  m4dcomplex_t


#define scalar_t double_t




#line 48

 /* This includes a file of m4 macros */
#line 17 "gstrf_gp.m4"

#define DESC_TYPE_CSC 10001

#define CSC_DESC_TYPE 0
#define CSC_DESC_M    1
#define CSC_DESC_N    2
#define CSC_DESC_NNZ  3
#define CSC_DESC_BASE 4
#define CSC_DESC_COLPTR 5
#define CSC_DESC_ROWIND(desc) (5+((desc)[2])+1)

typedef struct {
  int  lu_size;
  scalar_t*  lu_nz;
  int* lu_rowind;
  int* l_colptr;
  int* u_colptr;

  int* row_perm;
  int* col_perm;

} lu_t;

typedef void (*statistics_reporter_t)(void* context, 
                                      char* name, 
                                      double_t* val);

dgstrf_gp_c ( 
                      void       *gp,
		      int  mrows,
		      int  ncols,
		      scalar_t*  a_nz,
		      int* desc_a,
		      lu_t       **lu,
		      int  *info
		     )
{
  int* a_rowind;
  int* a_colptr;

  /* work arrays */
  
  scalar_t*    rwork  = NULL;
  scalar_t*    twork  = NULL;

  int*   found  = NULL;
  int*   parent = NULL;
  int*   child  = NULL;

  int*   pattern  = NULL;

  int*   cmatch   = NULL;
  int*   rmatch   = NULL;

  /* copies of object parameters */

  int pivot_policy; 
  double_t  pivot_threshold;
  double_t  drop_threshold;
  double_t  col_fill_ratio;
  double_t  fill_ratio;
  double_t  expand_ratio;

  /* local variables */

  int nrow = mrows;
  int ncol = ncols;

  int a_desc_type, a_m, a_n, a_nnz, a_base;

  int jcol, i; 
  int lasta; 
  int lastlu;
  int zpivot; 

  int local_pivot_policy;
  int nz_count_limit;

  int* user_col_perm;
  int  user_col_perm_length;
  int  user_col_perm_base;

  statistics_reporter_t reporter_func;
  void*                 reporter_ctxt;

  double_t   flops = 0.0;

  double_t   ujj, minujj;

  int       out_of_mem = FALSE;
  int       eline = -1;

  int pivt_row, orig_row, this_col, othr_col;

  /* constants */

  int izero = 0;
  scalar_t  zero  = 0.0; /* this is not good for complex !!! replace with macro */

  /* extract data from gp object */

  if ((gp) == NULL) {
    (*info) = -1;
    goto free_and_exit;
  }
  gp_get_pivot_policy_c        ((gp),&pivot_policy);
  gp_get_pivot_threshold_c     ((gp),&pivot_threshold);
  gp_get_drop_threshold_c      ((gp),&drop_threshold);
  gp_get_col_fill_ratio_c      ((gp),&col_fill_ratio);
  gp_get_fill_ratio_c          ((gp),&fill_ratio);
  gp_get_expand_ratio_c        ((gp),&expand_ratio);
  gp_get_statistics_reporter_c ((gp),&reporter_func,
                                                    &reporter_ctxt);
  gp_get_col_perm_c            ((gp),&user_col_perm,
                                                    &user_col_perm_length,
                                                    &user_col_perm_base);

  /*
  printf("piv pol=%d piv_thr=%lf drop_thr=%lf col_fill_rt=%lf\n",
	pivot_policy,pivot_threshold,drop_threshold,col_fill_ratio);
	*/

  /*pivot_threshold = 0.001;	*/
  /*if (pivot_threshold == 0.0) pivot_policy = 0;*/ /* no pivoting */
  /*pivot_policy = 0;*/ /* no pivoting */

  /* 
     if a column permutation is specified, 
     it must be a length ncol permutation.
  */

  if (user_col_perm != NULL && user_col_perm_length != ncol) {
    (*info) = -1;
    goto free_and_exit;
  }

  /* extract data from a's array descriptor */

  a_desc_type = desc_a[CSC_DESC_TYPE];
  if (a_desc_type != DESC_TYPE_CSC) { 
    (*info) = -5;
    goto free_and_exit;
  }
  a_m      = desc_a[CSC_DESC_M];
  a_n      = desc_a[CSC_DESC_N];
  a_nnz    = desc_a[CSC_DESC_NNZ];
  a_base   = desc_a[CSC_DESC_BASE];
  a_colptr = &( desc_a[CSC_DESC_COLPTR] );
  a_rowind = &( desc_a[CSC_DESC_ROWIND(desc_a)] );

  /* convert the descriptor to 1-base if necessary */

  if (a_base == 0) {
    for (jcol=0; jcol<(a_n+1); jcol++) (a_colptr[jcol])++;
    for (jcol=0; jcol<(a_nnz); jcol++) (a_rowind[jcol])++;
    desc_a[CSC_DESC_BASE] = 1;
    a_base                = 1;
  }

  /* Allocate work arrays. */

  if ((rwork  = (scalar_t*) malloc( nrow * sizeof(scalar_t)) ) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }

  if ((twork  = (scalar_t*) malloc( nrow * sizeof(scalar_t)) ) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }

  if ((found  = (int*) malloc( nrow * sizeof(int) )) == NULL) 
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }
  if ((child  = (int*) malloc( nrow * sizeof(int) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }
  if ((parent = (int*) malloc( nrow * sizeof(int) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }

  if ((pattern  = (int*) malloc( nrow * sizeof(int) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }

  /* Create lu structure */

  if (((*lu)  = (lu_t*) malloc( sizeof(lu_t) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }

  (*lu)->lu_nz     = NULL;
  (*lu)->lu_rowind = NULL;
  (*lu)->l_colptr  = NULL;
  (*lu)->u_colptr  = NULL;
  (*lu)->row_perm  = NULL;
  (*lu)->col_perm  = NULL;
  (*lu)->lu_size   = a_nnz * fill_ratio;

  if (((*lu)->lu_nz = 
        (scalar_t*) malloc( ((*lu)->lu_size) * sizeof(scalar_t) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }	

  if (((*lu)->lu_rowind = 
       (int*) malloc( ((*lu)->lu_size) * sizeof(int) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }	

  if (((*lu)->u_colptr = 
        (int*) malloc( (ncol+1) * sizeof(int) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }	

  if (((*lu)->l_colptr = 
        (int*) malloc( (ncol) * sizeof(int) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }	

  if (((*lu)->row_perm = 
        (int*) malloc( (nrow) * sizeof(int) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }	

  if (((*lu)->col_perm = 
        (int*) malloc( (ncol) * sizeof(int) )) == NULL)
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }	


  /* Compute max matching. We use elements of the lu structure */
  /* for all the temporary arrays needed.                      */

  if ((cmatch = (int*) malloc( ncol * sizeof(int) )) == NULL) 
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }
  if ((rmatch = (int*) malloc( nrow * sizeof(int) )) == NULL) 
    { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }

  for (jcol = 0; jcol < ncol; jcol++) {
    ((*lu)->l_colptr)[jcol]  =
    ((*lu)->u_colptr)[jcol]  =
    ((*lu)->col_perm)[jcol]  =
    ((*lu)->lu_rowind)[jcol] =
    rmatch[jcol]                      = 
    cmatch[jcol]                      = 0;
  }
  for ( i = 0; i < nrow; i++) {
    ((*lu)->row_perm)[i] = rmatch[i] = 0;
  }
/*
  for ( i = 0; i < a_nnz; i++) {
    a_nz[i] = ((double) random()) / ((double) 0x7fffffff);
  }
  for ( i = 0; i < a_nnz; i++) {
    if (a_nz[i] == 0.0) printf("Warning: numerically zero value in A\n");
  }
*/
  maxmatch_ ( 
                            &nrow ,                    /* in. */
       			    &ncol ,                    /* in. */
                            a_colptr,                  /* in. */
                            a_rowind,                  /* in. */
                            (*lu)->l_colptr,  /* work. prevcl(cols) */
                            (*lu)->u_colptr,  /* work. prevrw(cols) */
                            (*lu)->row_perm,  /* work. marker(rows) */
                            (*lu)->col_perm,  /* work. tryrow(cols) */
                            (*lu)->lu_rowind, /* work. nxtchp(cols) */
                            rmatch,                    /* out.  rowset(rows) */
                            cmatch                     /* out.  colset(cols) */ 
                           );
  
  for (jcol = 0; jcol < ncol; jcol++) 
    if (cmatch[jcol] == 0) {
      printf("Warning: Perfect matching not found\n");
      break;
    }

/*
  for (jcol = 0; jcol < ncol; jcol++) 
    cmatch[jcol] = rmatch[jcol] = jcol+1;
*/
  /* Initialize useful values and zero out the dense vectors.  
     If we are threshold pivoting, get row counts. */

  lastlu = 0;

  local_pivot_policy = pivot_policy;
  (*info) = 0;
  lasta = a_colptr[ncol] - 1;
  ((*lu)->u_colptr)[0] = 1;
  
  ifill_ (pattern, &nrow, &izero);
  ifill_ (found, &nrow, &izero);
  rfill_ (rwork, &nrow, &zero);
  ifill_ ((*lu)->row_perm, &nrow, &izero);

  if (user_col_perm == NULL) {
    for (jcol=0; jcol<ncol; jcol++) 
      ((*lu)->col_perm)[jcol] = jcol + 1;
  } else {
    printf("user_col_perm_base = %d\n",user_col_perm_base);
    for (jcol=0; jcol<ncol; jcol++) 
      ((*lu)->col_perm)[jcol] = user_col_perm[jcol] + (1 - user_col_perm_base);
  }

  /* compute one column at a time */

  for ( jcol = 1; jcol <= ncol; jcol++) {

    /* mark pointer to new column, ensure it is large enough */

    if (lastlu + nrow >= (*lu)->lu_size) {
      int new_size = (int) ( (*lu)->lu_size * expand_ratio );

      /* fprintf(stderr,"expanding to %d nonzeros...\n",new_size);*/

      if (((*lu)->lu_nz = 
            (scalar_t*) realloc( (*lu)->lu_nz,
                                 (new_size * sizeof(scalar_t)) )) == NULL)
        { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }	

      if (((*lu)->lu_rowind = 
           (int*) realloc( (*lu)->lu_rowind,
                                 (new_size * sizeof(int)) )) == NULL)
        { out_of_mem = TRUE; eline = __LINE__; goto free_and_exit; }	

      (*lu)->lu_size = new_size;
      }

    /* Set up nonzero pattern */

    {
      int jjj;

      jjj = ((*lu)->col_perm)[jcol-1];
      for (i = a_colptr[jjj-1];
           i < a_colptr[jjj];
           i++) {
        pattern[ a_rowind[i-1] - 1 ] = 1;
      }

      this_col = ((*lu)->col_perm)[jcol-1];
      orig_row = cmatch[ this_col - 1 ];

      pattern[ orig_row - 1 ] = 2;

      if (((*lu)->row_perm)[ orig_row - 1 ] != 0) {
	printf("ERROR: PIVOT ROW FROM MAX-MATCHING ALREADY USED.\n");
	exit(1);
      }
/*
      pattern[ this_col - 1 ] = 2;
*/
    }


    /*
      Depth-first search from each above-diagonal nonzero of column
      jcol of A, allocating storage for column jcol of U in
      topological order and also for the non-fill part of column
      jcol of L.
    */

    ludfs_ (
                           &jcol, 
                           a_nz, a_rowind, a_colptr, 
                           &lastlu,
                           (*lu)->lu_rowind, 
                           (*lu)->l_colptr, (*lu)->u_colptr, 
                           (*lu)->row_perm,
                           (*lu)->col_perm,
                           rwork,
                           found, parent, child,
                           info
                          );

    if ((*info) != 0) { (*info) = -100; goto free_and_exit; }

    /*
      Compute the values of column jcol of L and U in the dense
      vector, allocating storage for fill in L as necessary.
    */
    
    lucomp_ (
                            &jcol, 
                            &lastlu, 
                            (*lu)->lu_nz, (*lu)->lu_rowind, 
                            (*lu)->l_colptr, (*lu)->u_colptr, 
                            (*lu)->row_perm,
                            (*lu)->col_perm,
                            rwork, 
                            found,
	                    pattern, 
                            &flops
                           );

/*
    if (rwork[ orig_row - 1 ] == 0.0) {
	printf("WARNING: MATCHING TO A ZERO\n");

        for (i=a_colptr[jcol-1];i<a_colptr[jcol]; i++)
          printf("(%d,%lg) ",a_rowind[i-1],a_nz[i-1]);
     	printf(". orig_row=%d\n",orig_row);

    }
*/

    /*
      Copy the dense vector into the sparse data structure, find the
      diagonal element (pivoting if specified), and divide the
      column of L by it.
    */

    nz_count_limit = (int) (col_fill_ratio * 
                            ((double)(a_colptr[this_col]-a_colptr[this_col-1] + 1)));

    lucopy_ (
                            &local_pivot_policy, 
                            &pivot_threshold, 
                            &drop_threshold, 
	                    &nz_count_limit,
                            &jcol, 
                            &ncol, 
                            &lastlu, 
                            (*lu)->lu_nz, (*lu)->lu_rowind, 
                            (*lu)->l_colptr, (*lu)->u_colptr, 
                            (*lu)->row_perm,
                            (*lu)->col_perm,
                            rwork,
	                    pattern, 
	                    twork,
                            &flops,
                            &zpivot
                           );


    if (zpivot == -1) {
      (*info) = jcol;
      goto free_and_exit;
    }

    {
      int jjj;

      jjj = ((*lu)->col_perm)[jcol-1];
      for (i = a_colptr[jjj-1];
           i < a_colptr[jjj];
           i++) {
        pattern[ a_rowind[i-1] - 1 ] = 0;
      }

      pattern[ orig_row - 1 ] = 0;

      pivt_row = zpivot;
      othr_col = rmatch[ pivt_row - 1 ];

      cmatch[ this_col - 1 ] = pivt_row;
      cmatch[ othr_col - 1 ] = orig_row;
      rmatch[ orig_row - 1 ] = othr_col;
      rmatch[ pivt_row - 1 ] = this_col;

/*
      pattern[ this_col - 1 ] = 0;
*/
    }

    /*
      If there are no diagonal elements after this column, change
      the pivot mode.
    */
    
    if (jcol == nrow) local_pivot_policy = -1;
 
  } /* end of jcol loop */

  /*
    Fill in the zero entries of the permutation vector, and renumber the
    rows so the data structure represents L and U, not PtL and PtU.
  */

  jcol = ncol + 1;
  for (i = 0; i < nrow; i++) {
    if (((*lu)->row_perm)[i] == 0) {
      ((*lu)->row_perm)[i] = jcol;
      jcol = jcol + 1;
    }
  }

  for (i = 0; i < lastlu; i++)
    ((*lu)->lu_rowind)[i] = ((*lu)->row_perm)[((*lu)->lu_rowind)[i]-1];

  /* Return */

free_and_exit:

/*
  printf("rperm:\n[");
  for (i=0; i<ncol; i++) printf("%d ",((*lu)->row_perm)[i]);
  printf("];\n");

  printf("cperm:\n[");
  for (i=0; i<ncol; i++) printf("%d ",((*lu)->col_perm)[i]);
  printf("];\n");
*/
  if (out_of_mem) {
    fprintf(stderr,
            "Out of space in gstrf_gp. Limit of maxlu=%d exceeded at column %d line %d\n",
            (*lu)->lu_size,jcol,eline);
    (*info) = -999;
  }
 


  if (rmatch) free(rmatch);
  if (cmatch) free(cmatch);

  if (pattern) free(pattern);

  if (parent) free(parent);
  if (child)  free(child);
  if (found)  free(found);
  if (twork)  free(rwork);
  if (rwork)  free(rwork);

  if ((*info) != 0) { 
    if (*lu) {

      if ((*lu)->row_perm)  free((*lu)->col_perm);
      if ((*lu)->row_perm)  free((*lu)->row_perm);
      if ((*lu)->u_colptr)  free((*lu)->u_colptr);
      if ((*lu)->l_colptr)  free((*lu)->l_colptr);
      if ((*lu)->lu_rowind) free((*lu)->lu_rowind);
      if ((*lu)->lu_nz)     free((*lu)->lu_nz);

      free (*lu);
      *lu = NULL;
    }
  } else {
    minujj = 1.0 / 0.0;

    for (jcol=1; jcol<=ncol; jcol++) {
      ujj = fabs(((*lu)->lu_nz)[((*lu)->l_colptr)[jcol-1]-2]);
      if (ujj < minujj) minujj = ujj;
    }

    /*printf(">>> last = %lg, min = %lg\n",ujj,minujj);*/
  }




  if (reporter_func) {
    (*reporter_func)(reporter_ctxt,"FLOPS",&flops);
    flops = (double) lastlu;
    (*reporter_func)(reporter_ctxt,"NONZEROS",&flops);
  }

  return;
}

dgstrs_gp_c ( 
                      void         *gp,
                      char* trans,
		      int    n,
		      int    nrhs,
                      lu_t         *lu,
                      int    ia,
                      int    ja,
                      scalar_t*    b,
                      int    ib,
                      int    jb,
                      int*   desc_b,
		      int    *info
		     )
{
  scalar_t*    rwork  = NULL;

  int b_desc_type, b_ld;

  statistics_reporter_t reporter_func;
  void*                 reporter_ctxt;

  double_t   flops = 0.0;

  (*info) = 0;

  /* extract data from gp object */

  if ((gp) == NULL) {
    (*info) = -1;
    goto free_and_exit;
  }

  gp_get_statistics_reporter_c ((gp),&reporter_func,
                                                    &reporter_ctxt);

  if (ia != 1) { (*info) = -5; goto free_and_exit; }  
  if (ja != 1) { (*info) = -6; goto free_and_exit; }  
  if (ib != 1) { (*info) = -8; goto free_and_exit; }  
  if (jb != 1) { (*info) = -9; goto free_and_exit; }  

  if (nrhs != 1) { (*info) = -3; goto free_and_exit; }  

  /* we do not need this now since we assume a single dense rhs */

  /*
  b_desc_type = desc_b[0];
  if (b_desc_type != DESC_TYPE_DENSE) { (*info) = -10; goto free_and_exit; }
  b_ld = desc_b[1];
  */

  if ((rwork  = 
         (scalar_t*) malloc( n * sizeof(scalar_t) )) == NULL)
    { (*info) = -999; goto free_and_exit; }

  if (toupper(trans[0]) == 'N') {

    lsolve_ (&(n), 
                            (lu)->lu_nz, (lu)->lu_rowind, 
                            (lu)->l_colptr, (lu)->u_colptr, 
                            (lu)->row_perm, 
                            (lu)->col_perm, 
                            b, 
                            rwork,
	                    info);

    usolve_ (&(n), 
                            (lu)->lu_nz, (lu)->lu_rowind, 
                            (lu)->l_colptr, (lu)->u_colptr, 
                            (lu)->row_perm, 
                            (lu)->col_perm, 
                            rwork, 
                            b,
                            info);

  } else if (toupper(trans[0]) == 'T') {

    utsolve_ (&(n), 
                            (lu)->lu_nz, (lu)->lu_rowind, 
                            (lu)->l_colptr, (lu)->u_colptr, 
                            (lu)->row_perm, 
                            (lu)->col_perm, 
                            b, 
                            rwork,
	                    info);

    ltsolve_ (&(n), 
                            (lu)->lu_nz, (lu)->lu_rowind, 
                            (lu)->l_colptr, (lu)->u_colptr, 
                            (lu)->row_perm, 
                            (lu)->col_perm, 
                            rwork, 
                            b,
                            info);

  } else { (*info) = -1; goto free_and_exit; }

  flops = (double) ( 2 * ((lu)->u_colptr[n] - 1) );

free_and_exit:
  if (rwork)  free(rwork);

  if (reporter_func) 
    (*reporter_func)(reporter_ctxt,"FLOPS",&flops);

  return;
}

gstff_gp_c (lu_t *lu)
{
  if ( (lu) ) {

    if ((lu)->row_perm)  free((lu)->col_perm);
    if ((lu)->row_perm)  free((lu)->row_perm);
    if ((lu)->u_colptr)  free((lu)->u_colptr);
    if ((lu)->l_colptr)  free((lu)->l_colptr);
    if ((lu)->lu_rowind) free((lu)->lu_rowind);
    if ((lu)->lu_nz)     free((lu)->lu_nz);

    free( (lu) );
  }
}

