/*
   Defines an Domain Decomposition  preconditioner
*/
#include "parpre_pc.h"
#include "petscsles.h"
#include "src/sles/slesimpl.h"
#include "src/sles/pc/pcimpl.h"
#include "petscvec.h"
#include "src/vec/vecimpl.h"
#include "parpre_subdomains.h"
#include "parpre_mat.h"
#include "parpre_mat.h"
#include "src/mat/matimpl.h"
#include "src/mat/parpre_matimpl.h"
#include "petscoptions.h"
#include "src/mat/impls/aij/mpi/mpiaij.h"

static int events[2];
#define DOMAIN_EVENT 0
#define GAMMA_EVENT 1

#define CHUNCKSIZE   100

typedef struct {
  PCParallelSubdomainStruct subdomains;
  SLES interface_method; IS interface_set;
  Mat C11,C12,C21,/* C12_big,C21_big,*/ C22;
  Mat interface_system; 
  Vec global_vector,global_vector2; /* unused ? */
  Vec local_edge_vec,local_edge_vec2, mpi_edge_vec,mpi_edge_vec2;
  Vec local_intl_vec,local_intl_vec2, mpi_intl_vec,mpi_intl_vec2;
  Vec interface_vector,interface_vector2; /* unnecessary? */
  VecScatter get_edge,put_edge,get_intl,put_intl;
  int width;
} PC_DDecomp_struct;

/****************************************************************
 * User Interface
 ****************************************************************/
#undef __FUNC__
#define __FUNC__ "PCDomainDecompSetWidth"
/*@
  PCDomainDecompSetWidth - set the width of the interface between
  the subdomains of a Schur complement preconditioner.

  Parameters:
+ pc - the preconditioners
- w - a positive integer
@*/
int PCDomainDecompSetWidth(PC pc,int w)
{
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;

  PetscFunctionBegin;
  if (((PetscObject)pc)->cookie == PC_COOKIE) {
    if (w>0) pc_data->width = w;
    else SETERRQ(1,0,"Nonsensical value for interface width");
  } else PetscFunctionReturn(1);

  PetscFunctionReturn(0);

}

#undef __FUNC__
#define __FUNC__ "PCDomainDecompGetInterfaceSLES"
/*@
  PCDomainDecompGetInterfaceSLES - Get the SLES object that is applied
  as the interface system solver.

  Parameters:
+ pc - the preconditioner
- intsles - a pointer to the interface SLES
@*/
int PCDomainDecompGetInterfaceSLES(PC pc,SLES *intsles)
{
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;

  PetscFunctionBegin;
  if (((PetscObject)pc)->cookie == PC_COOKIE) {
     *intsles = pc_data->interface_method;
  } else PetscFunctionReturn(1);

  PetscFunctionReturn(0);

}

#undef __FUNC__
#define __FUNC__ "PCDomainDecompGetInterfacePC"
/*@
  PCDomainDecompGetInterfacePC - Get the PC object that is applied
  as the interface system solver.

  Parameters:
+ pc - the preconditioner
- intpc - a pointer to the interface PC
@*/
int PCDomainDecompGetInterfacePC(PC pc,PC *intpc)
{
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;
  SLES int_solve = pc_data->interface_method;
  int ierr;

  PetscFunctionBegin;
  if (((PetscObject)pc)->cookie == PC_COOKIE) {
    ierr = SLESGetPC(int_solve,intpc); CHKERRQ(ierr);
  } else PetscFunctionReturn(1);

  PetscFunctionReturn(0);

}

#undef __FUNC__
#define __FUNC__ "PCDomainDecompGetInterfaceMat"
/*@
  PCDomainDecompGetInterfaceMat - Get the Mat object 
  on which  the interface system solver is based.

  Parameters:
+ pc - the preconditioner
- intmat - a pointer to the interface Mat
@*/
int PCDomainDecompGetInterfaceMat(PC pc,Mat *intmat)
{
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;
  SLES int_solve = pc_data->interface_method;
  PC intpc; Mat pcmat; MatStructure flag;
  int ierr;

  PetscFunctionBegin;
  if (((PetscObject)pc)->cookie == PC_COOKIE) {
    ierr = SLESGetPC(int_solve,&intpc); CHKERRQ(ierr);
    ierr = PCGetOperators(intpc,intmat,&pcmat,&flag); CHKERRQ(ierr);
  } else PetscFunctionReturn(1);

  PetscFunctionReturn(0);

}

#undef __FUNC__
#define __FUNC__ "PCDomainDecompGetInterfaceIS"
/*@
  PCDomainDecompGetInterfaceIS - Get the IS object 
  on which  the interface system solver is based.

  Parameters:
+ pc - the preconditioner
- intIS - a pointer to the interface IS
@*/
int PCDomainDecompGetInterfaceIS(PC pc,IS *intis)
{
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;

  PetscFunctionBegin;
  if (((PetscObject)pc)->cookie == PC_COOKIE) {
    *intis = pc_data->interface_set;
  } else PetscFunctionReturn(1);

  PetscFunctionReturn(0);

}

/****************************************************************
 * Implementation
 ****************************************************************/

#undef __FUNC__
#define __FUNC__ "InitialInterface"
static int InitialInterface(Mat base_mat,IS *interface)
{
  Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) base_mat->data;
  VecScatter ctx = Aij->Mvctx;
  VecScatter_MPI_General *to = (VecScatter_MPI_General *) ctx->todata;
  int i,istart,iend,ierr,*idx, n=0;

  PetscFunctionBegin;
  if (to->starts) n = to->starts[to->n];
  ierr = MatGetOwnershipRange(base_mat,&istart,&iend); CHKERRQ(ierr);
  idx = (int *) PetscMalloc((n+1)*sizeof(int)); CHKPTRQ(idx);
  for (i=0; i<n; i++) idx[i] = to->indices[i]+istart;
  ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,interface);
  PetscFree(idx);

  PetscFunctionReturn(0);
}

#undef __FUNC__
#define __FUNC__ "RestrictInterface"
static int RestrictInterface(Mat base_mat,IS *interface)
{
  Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) base_mat->data;
  Mat off_mat = Aij->B;
  int istart,iend,i,n_new,n_int,ierr,*new_idx,*int_idx;

  PetscFunctionBegin;
  ierr = ISGetSize(*interface,&n_int); CHKERRQ(ierr);
  new_idx = (int *) PetscMalloc((n_int+1)*sizeof(int)); CHKPTRQ(new_idx);

  /*printf("Restricting interface: ");*/
  ierr = ISGetIndices(*interface,&int_idx); CHKERRQ(ierr);
  ierr = MatGetOwnershipRange(base_mat,&istart,&iend); CHKERRQ(ierr);
  n_new = 0;
  for ( i=0; i<n_int; i++ ){
    int row = int_idx[i],accept;
    if ( (row<istart) | (row>=iend) )
      accept = 0;
    else {
      int lrow = row-istart,col,ncol,*idx; accept = 0;
      ierr = MatGetRow(off_mat,lrow,&ncol,&idx,PETSC_NULL); CHKERRQ(ierr);
      if (ncol==0)
	accept = 1;
      else 
	for (col=0; col<ncol; col++) {
	  if (Aij->garray[idx[col]]>=iend) {accept=1; break;}
	}
      ierr = MatRestoreRow(off_mat,lrow,&ncol,&idx,PETSC_NULL); CHKERRQ(ierr);
      /*    accept = 1;*/
    }
    if (accept)
      new_idx[n_new++] = int_idx[i];
    /*if (accept) printf("%d:Yes ",row); else printf("%d:No ",row);*/
  }
  /*printf("\n");*/
  ierr = ISRestoreIndices(*interface,&int_idx); CHKERRQ(ierr);
  ierr = ISDestroy(*interface); CHKERRQ(ierr);
  ierr = ISCreateGeneral(MPI_COMM_SELF,n_new,new_idx,interface); CHKERRQ(ierr);

  /* Now sort the interface, and weed out duplicates */
  ierr = ISSort(*interface); CHKERRQ(ierr);
  {
    int need=0;
    ierr = ISGetSize(*interface,&n_int); CHKERRQ(ierr);
    ierr = ISGetIndices(*interface,&int_idx); CHKERRQ(ierr);
    for (i=1; i<n_int; i++)
      if (int_idx[i]==int_idx[i-1]) need++;
    if (need) {
      int save=-1,unique=0;
      /*printf("IS needs weeding:\n");ISView(*interface,0);*/
      for (i=0; i<n_new; i++)
	if (int_idx[i]>save) {
	  new_idx[unique++] = save = int_idx[i];}
      ierr = ISRestoreIndices(*interface,&int_idx); CHKERRQ(ierr);
      ierr = ISDestroy(*interface); CHKERRQ(ierr);
      ierr = ISCreateGeneral
	(MPI_COMM_SELF,unique,new_idx,interface); CHKERRQ(ierr);
      /*printf("IS shrunk:\n");ISView(*interface,0);*/
    } else {
      ierr = ISRestoreIndices(*interface,&int_idx); CHKERRQ(ierr);
    }
  }
  /* ISView(*interface,0); */
  PetscFree(new_idx);

  PetscFunctionReturn(0);
}

#undef __FUNC__
#define __FUNC__ "LocalSplitSets"
static int LocalSplitSets
(Mat base_mat,IS interface,IS *edge_set,IS *intl_set)
{
  int istart,iend,local_size, ierr;
  int *idx,nidx, i_idx,n_e,n_i,n_e_s,n_i_s, *ee,*ii;

  PetscFunctionBegin;
  ierr = MatGetOwnershipRange(base_mat,&istart,&iend); CHKERRQ(ierr);
  local_size = iend-istart;
  ierr = ISGetSize(interface,&nidx); CHKERRQ(ierr);
  ierr = ISGetIndices(interface,&idx); CHKERRQ(ierr);

  n_e = 0;
  for (i_idx=0; i_idx<nidx; i_idx++)
    if ( (idx[i_idx]>=istart) && (idx[i_idx]<iend) )
      n_e++;
  n_e_s = n_e; n_i_s = local_size-n_e;

  ee = (int *) PetscMalloc( (n_e+1)*sizeof(int) );
  CHKPTRQ(ee);
  ii = (int *) PetscMalloc( (local_size-n_e+1)*sizeof(int) );
  CHKPTRQ(ii);

  n_e = n_i = 0;
  i_idx = 0;
  if (nidx>0) {
    int i;
    while ( (i_idx<nidx) && (idx[i_idx]<istart) ) i_idx++;
    for ( i=istart; i<iend; i++ )
      if (i==idx[i_idx]) {
	ee[n_e++] = i; i_idx++;
      } else
	ii[n_i++] = i;
  } else
    for (i_idx=istart; i_idx<iend; i_idx++) ii[n_i++] = i_idx;
  if (n_e!=n_e_s) SETERRQ(1,0,"n_e mismatch");
  if (n_i!=n_i_s) SETERRQ(1,0,"n_i mismatch");

  ierr = ISRestoreIndices(interface,&idx); CHKERRQ(ierr);

  ierr = ISCreateGeneral(MPI_COMM_SELF,n_e,ee,edge_set); CHKERRQ(ierr);
  ierr = ISCreateGeneral(MPI_COMM_SELF,n_i,ii,intl_set); CHKERRQ(ierr);
  PetscFree(ee); PetscFree(ii);

  PetscFunctionReturn(0);
}

#undef __FUNC__
#define __FUNC__ "DDVectors"
static int DDVectors
(PC_DDecomp_struct *pc_data,MPI_Comm comm, int nloc,int n_edge,int n_intl)
{
  Scalar *g;
  int ierr;

  PetscFunctionBegin;
  /* internal vector */
  ierr = VecCreateMPI(comm,n_intl,PETSC_DECIDE,&pc_data->mpi_intl_vec);
  CHKERRQ(ierr);
  ierr = VecDuplicate(pc_data->mpi_intl_vec,&pc_data->mpi_intl_vec2);
  CHKERRQ(ierr);
  /* local vectors with the same allocated array space */
  ierr = VecGetArray(pc_data->mpi_intl_vec,&g); CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray
    (MPI_COMM_SELF,n_intl,g,&pc_data->local_intl_vec);
  CHKERRQ(ierr);
  ierr = VecRestoreArray(pc_data->mpi_intl_vec,&g); CHKERRQ(ierr);
  ierr = VecGetArray(pc_data->mpi_intl_vec2,&g); CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray
    (MPI_COMM_SELF,n_intl,g,&pc_data->local_intl_vec2);
  CHKERRQ(ierr);
  ierr = VecRestoreArray(pc_data->mpi_intl_vec2,&g); CHKERRQ(ierr);

  /* edge vectors */
  ierr = VecCreateMPI(comm,n_edge,PETSC_DECIDE,&pc_data->mpi_edge_vec);
  CHKERRQ(ierr);
  ierr = VecDuplicate(pc_data->mpi_edge_vec,&pc_data->mpi_edge_vec2);
  CHKERRQ(ierr);
  /* local vectors with the same allocated array space */
  ierr = VecGetArray(pc_data->mpi_edge_vec,&g); CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray
    (MPI_COMM_SELF,n_edge,g,&pc_data->local_edge_vec);
  CHKERRQ(ierr);
  ierr = VecRestoreArray(pc_data->mpi_edge_vec,&g); CHKERRQ(ierr);
  ierr = VecGetArray(pc_data->mpi_edge_vec2,&g); CHKERRQ(ierr);
  ierr = VecCreateSeqWithArray
    (MPI_COMM_SELF,n_edge,g,&pc_data->local_edge_vec2);
  CHKERRQ(ierr);
  ierr = VecRestoreArray(pc_data->mpi_edge_vec2,&g); CHKERRQ(ierr);

  /* create global vectors */
  ierr = VecCreateMPI
    (comm,nloc,PETSC_DECIDE,&pc_data->global_vector);
  CHKERRQ(ierr);
  ierr = VecDuplicate
    (pc_data->global_vector,&pc_data->global_vector2);
  CHKERRQ(ierr);

  ierr = VecCreateMPI
    (comm,n_edge,PETSC_DECIDE,&pc_data->interface_vector);
  CHKERRQ(ierr);
  ierr = VecDuplicate
    (pc_data->interface_vector,&pc_data->interface_vector2);
  CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef __FUNC__
#define __FUNC__ "DDScatters"
static int DDScatters
(PC_DDecomp_struct *pc_data,MPI_Comm comm,
 int n_interior,IS intl_set,IS edge_set)
{
  IS edge_src=edge_set,edge_tar,intl_src=intl_set,intl_tar;
  int i_start,e_start,n_edge_vars,n_intl_vars;
  int ierr;

  PetscFunctionBegin;
  /* create internal and interface vectors */
  ierr = ISGetSize(edge_set,&n_edge_vars); CHKERRQ(ierr);
  ierr = ISGetSize(intl_set,&n_intl_vars); CHKERRQ(ierr);
  ierr = DDVectors
    (pc_data,comm,n_interior,n_edge_vars,n_intl_vars);
  CHKERRQ(ierr);

  {
    int np,ip,*ps,p,s;

    /* consecutive global index sets for internal & edge vars */
    MPI_Comm_size(comm,&np); MPI_Comm_rank(comm,&ip);
    ps = (int *) PetscMalloc(np*sizeof(int)); CHKPTRQ(ps);

    MPI_Allgather((void*)&n_edge_vars,1,MPI_INT,(void*)ps,1,MPI_INT,comm);
    s = 0;
    for (p=0; p<np; p++) {
      int t = ps[p];
      ps[p] = s;
      s += t;
    }
    e_start = ps[ip];

    MPI_Allgather((void*)&n_intl_vars,1,MPI_INT,(void*)ps,1,MPI_INT,comm);
    s = 0;
    for (p=0; p<np; p++) {
      int t = ps[p];
      ps[p] = s;
      s += t;
    }
    i_start = ps[ip];
  }

  ierr = ISCreateStride(MPI_COMM_SELF,n_edge_vars,e_start,1,&edge_tar);
  CHKERRQ(ierr);
  ierr = ISCreateStride(MPI_COMM_SELF,n_intl_vars,i_start,1,&intl_tar);
  CHKERRQ(ierr);

  ierr = VecScatterCreate
    (pc_data->global_vector,intl_src,pc_data->mpi_intl_vec,intl_tar,
     &pc_data->get_intl);
  CHKERRQ(ierr);

  ierr = VecScatterCreate
    (pc_data->mpi_intl_vec,intl_tar,pc_data->global_vector,intl_src,
     &pc_data->put_intl);
  CHKERRQ(ierr);
  
  ierr = VecScatterCreate
    (pc_data->global_vector,edge_src,pc_data->mpi_edge_vec,edge_tar,
     &pc_data->get_edge);
  CHKERRQ(ierr);
  ierr = VecScatterCreate
    (pc_data->mpi_edge_vec,edge_tar,pc_data->global_vector,edge_src,
     &pc_data->put_edge);
  CHKERRQ(ierr);
  
  ierr = ISDestroy(edge_tar); CHKERRQ(ierr);
  ierr = ISDestroy(intl_tar); CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef __FUNC__
#define __FUNC__ "DDBlocks"
static int DDBlocks
(Mat base_mat,PC pc,
 IS intl_set,IS edge_set,IS g_intl_set,IS g_edge_set,
 Mat *from_interface,Mat *to_interface,Mat *interface_mat)
{
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;
  IS i_sets[7],j_sets[7];
  Mat *res_mat;
  int ierr;

  PetscFunctionBegin;
  i_sets[0] = intl_set; j_sets[0] = intl_set;
  i_sets[1] = edge_set; j_sets[1] = edge_set;
  i_sets[2] = edge_set; j_sets[2] = intl_set;
  i_sets[3] = intl_set; j_sets[3] = edge_set;

  i_sets[4] = intl_set; j_sets[4] = g_edge_set;
  i_sets[5] = edge_set; j_sets[5] = g_edge_set;
  i_sets[6] = g_edge_set; j_sets[6] = intl_set;

  ierr = MatGetSubMatrices
    (base_mat,7,i_sets,j_sets,MAT_INITIAL_MATRIX,&res_mat); CHKERRQ(ierr);
  pc_data->C11 = res_mat[0]; pc_data->C22 = res_mat[1];
  pc_data->C21 = res_mat[2]; pc_data->C12 = res_mat[3];
  *to_interface = res_mat[4]; *interface_mat = res_mat[5];
  *from_interface = res_mat[6];
  PetscFree(res_mat);

  /* Initialise various local solution methods */
  {
    SLES local_sles;
    ierr = PCParallelGetLocalSLES(pc,&local_sles); CHKERRQ(ierr);
    ierr = SLESSetOperators(local_sles,pc_data->C11,pc_data->C11,
			    (MatStructure)0);
    CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}
  
#include "src/sles/pc/utils/auxs.c"

#undef __FUNC__
#define __FUNC__ "PCSetup_DomainDecomp"
static int PCSetup_DomainDecomp(PC pc)
{
  Mat base_mat = pc->pmat, from_interface,to_interface,interface_mat;
  MPI_Comm    comm = base_mat->comm;
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;
  IS g_edge_set,g_intl_set;
  int         interface_size,local_jsize,ierr,idum;

  PetscFunctionBegin;
  /* make sure you're only called for parallel execution */
  if (!(base_mat->type==MATMPIAIJ)) {
    SETERRQ(1,0,"Domain Decomp preconditioner only implemented for AIJMPI\n");
  }

  ierr = MatGetLocalSize(base_mat,&local_jsize,&idum); CHKERRQ(ierr);

  /* set the pc and interface comm */
  pc->comm = comm;
  pc_data->interface_method->comm = comm;

  {
    IS edge_set,intl_set;

    {
      IS interface_set; int i;

      /* set the interface to the points connecting to other procs */
      ierr = InitialInterface(base_mat,&interface_set); CHKERRQ(ierr);

      /* restrict interface to connections to higher procs */
      ierr = RestrictInterface(base_mat,&interface_set); CHKERRQ(ierr);

      for (i=1; i<pc_data->width; i++) {
	ierr = MatIncreaseOverlap(base_mat,1,&interface_set,1); CHKERRQ(ierr);
	ierr = RestrictInterface(base_mat,&interface_set); CHKERRQ(ierr);
      }
      ierr = LocalSplitSets
	(base_mat,interface_set,&edge_set,&intl_set); CHKERRQ(ierr);
      ierr = ISGetSize(edge_set,&interface_size); CHKERRQ(ierr);
      ierr = ISGetGlobalContent(comm,edge_set,&g_edge_set); CHKERRQ(ierr);
      ierr = ISGetGlobalContent(comm,intl_set,&g_intl_set); CHKERRQ(ierr);
      pc_data->interface_set = interface_set;
    }

    /* setup vectors and vector scatters to/from the interface */
    ierr = DDScatters
      (pc_data,comm,local_jsize,intl_set,edge_set); CHKERRQ(ierr);
    /* extract blocks and set local solution method*/
    ierr = DDBlocks
      (base_mat,pc,intl_set,edge_set,g_intl_set,g_edge_set,
       &from_interface,&to_interface,&interface_mat); CHKERRQ(ierr);

    /* dispose of temporary data */
    ierr = ISDestroy(intl_set); CHKERRQ(ierr);
    ierr = ISDestroy(edge_set); CHKERRQ(ierr);
    ierr = MatDestroy(pc_data->C22); CHKERRQ(ierr);
  }

  {
    PC local_pc;
    PCParallelGetLocalPC(pc,&local_pc);
    ierr = PCSetVector(local_pc,pc_data->local_intl_vec); CHKERRQ(ierr);
    ierr = PCSetUp(local_pc); CHKERRQ(ierr);
  }

  {
    Mat A11C12,filler;
    SLES local_sles;
    PC local_pc;
    int mytid;
    MPI_Comm_rank(comm,&mytid);
    ierr = PCParallelGetLocalSLES(pc,&local_sles); CHKERRQ(ierr);
    ierr = MatSolveMat_AIJ(local_sles,to_interface,&A11C12); CHKERRQ(ierr);
    /*    MatFileDump(A11C12,"a12_",mytid);*/
    ierr = MatDestroy(to_interface); CHKERRQ(ierr);

    ierr = MatMatMult_AIJ(from_interface,A11C12,&filler); CHKERRQ(ierr);
    ierr = MatDestroy(from_interface); CHKERRQ(ierr);
    ierr = MatDestroy(A11C12); CHKERRQ(ierr);

    ierr = MatrixAij2MpiAbut
      (comm,interface_size,interface_mat,1,
       &pc_data->interface_system); CHKERRQ(ierr);
    /*    MatFileDump(pc_data->interface_system,"a",22);*/
    {
      Mat bigfil; int row,nrows;
      ierr = ISGetSize(g_edge_set,&nrows); CHKERRQ(ierr);
      ierr = MatCreateMPIAIJ(comm,PETSC_DECIDE,PETSC_DECIDE,nrows,nrows,
			     0,0,0,0, &bigfil); CHKERRQ(ierr);
      for (row=0; row<nrows; row++) {
	int ncols,*cols,icol; Scalar *vals;
	ierr = MatGetRow(filler,row,&ncols,&cols,&vals); CHKERRQ(ierr);
	ierr = MatSetValues
	  (bigfil,1,&row,ncols,cols,vals,ADD_VALUES); CHKERRQ(ierr);
	ierr = MatRestoreRow(filler,row,&ncols,&cols,&vals); CHKERRQ(ierr);
      }
      ierr = MatAssemblyBegin(bigfil,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
      ierr = MatAssemblyEnd(bigfil,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
      /*      MatFileDump(bigfil,"a",212);*/
    }
    {
      int m,n, row,nrows;
      
      ierr = ISGetSize(g_edge_set,&nrows); CHKERRQ(ierr);
      ierr = MatGetLocalSize(filler,&m,&n); CHKERRQ(ierr);
      if ( (m!=nrows) | (n!=nrows) ) SETERRQ(1,0,"Filler / edge mismatch");
      for (row=0; row<nrows; row++) {
	int ncols,*cols,icol; Scalar *vals;
	ierr = MatGetRow(filler,row,&ncols,&cols,&vals); CHKERRQ(ierr);
	for (icol=0; icol<ncols; icol++) vals[icol] = -vals[icol];
	ierr = MatSetValues
	  (pc_data->interface_system,1,&row,ncols,cols,vals,ADD_VALUES); 
	CHKERRQ(ierr);
	for (icol=0; icol<ncols; icol++) vals[icol] = -vals[icol];
	ierr = MatRestoreRow(filler,row,&ncols,&cols,&vals); CHKERRQ(ierr);
      }
    }
    ierr = MatDestroy(filler); CHKERRQ(ierr);
    ierr = MatAssemblyBegin(pc_data->interface_system,MAT_FINAL_ASSEMBLY);
    CHKERRQ(ierr);
    ierr = MatAssemblyEnd(pc_data->interface_system,MAT_FINAL_ASSEMBLY);
    CHKERRQ(ierr);

    ierr = SLESSetOperators
      (pc_data->interface_method,pc_data->interface_system,
       pc_data->interface_system,(MatStructure)0);
    CHKERRQ(ierr);
    ierr = SLESGetPC(pc_data->interface_method,&local_pc); CHKERRQ(ierr);
    ierr = PCSetVector(local_pc,pc_data->interface_vector); CHKERRQ(ierr);
    ierr = PCSetUp(local_pc); CHKERRQ(ierr);
  }

  ierr = ISDestroy(g_intl_set); CHKERRQ(ierr);
  ierr = ISDestroy(g_edge_set); CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef __FUNC__
#define __FUNC__ "PCApply_DomainDecomp"
static int PCApply_DomainDecomp(PC pc,Vec x,Vec y)
{
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;
  MPI_Comm comm;
  SLES local_sles;
  int ierr,ntids,its;
  /*internal*/
  Vec tmp_i = pc_data->mpi_intl_vec, tmp_li = pc_data->local_intl_vec;
  Vec tmp_i2 = pc_data->mpi_intl_vec2, tmp_li2 = pc_data->local_intl_vec2;
  /* edge */
  Vec tmp_e = pc_data->mpi_edge_vec/* ,tmp_le = pc_data->local_edge_vec*/;
  Vec tmp_e2 = pc_data->mpi_edge_vec2,tmp_le2 = pc_data->local_edge_vec2;
  Scalar zero = 0.0, mone = -1.0;

  PetscFunctionBegin;

  ierr = PetscObjectGetComm((PetscObject)pc,&comm); CHKERRQ(ierr);
  MPI_Comm_size(comm,&ntids);

  if (ntids==1) {
    /* this is wrong. */
    ierr = SLESSolve(local_sles,x,y,&its); CHKERRQ(ierr);
  } else {
    ierr = VecSet(&zero,y); CHKERRQ(ierr);
    ierr = PCParallelGetLocalSLES(pc,&local_sles); CHKERRQ(ierr);
    
    /* pack interior in smaller vector */
    ierr = VecScatterBegin
      (x,tmp_i,INSERT_VALUES,SCATTER_FORWARD,pc_data->get_intl);
    CHKERRQ(ierr);
    ierr = VecScatterEnd
      (x,tmp_i,INSERT_VALUES,SCATTER_FORWARD,pc_data->get_intl);
    CHKERRQ(ierr);
    
    /* solve interior into tmp_i */
    PLogEventBegin(events[DOMAIN_EVENT],0,0,0,0);
    ierr = SLESSolve(local_sles,tmp_li,tmp_li2,&its); CHKERRQ(ierr);
    PLogEventEnd(events[DOMAIN_EVENT],0,0,0,0);
    
    /* pack edge in smaller vector */
    ierr = VecScatterBegin
      (x,tmp_e,INSERT_VALUES,SCATTER_FORWARD,pc_data->get_edge);
    CHKERRQ(ierr);
    ierr = VecScatterEnd
      (x,tmp_e,INSERT_VALUES,SCATTER_FORWARD,pc_data->get_edge);
    CHKERRQ(ierr);
    
    /* multiply interior vector to edge vector tmp_e */
    ierr = MatMult(pc_data->C21,tmp_li2,tmp_le2); CHKERRQ(ierr);
    
    /* subtract from existing edge into edge_vec */
    ierr = VecAXPY(&mone,tmp_e2,tmp_e); CHKERRQ(ierr);
    
    /* solve on the edge, data is on pc_data->edge_vec */
    PLogEventBegin(events[GAMMA_EVENT],0,0,0,0);
    ierr = SLESSolve
      (pc_data->interface_method,tmp_e,tmp_e2,&its); CHKERRQ(ierr);
    PLogEventEnd(events[GAMMA_EVENT],0,0,0,0);
    
    /* write edge into global output */
    ierr = VecScatterBegin
      (tmp_e2,y,INSERT_VALUES,SCATTER_FORWARD,pc_data->put_edge);
    CHKERRQ(ierr);
    ierr = VecScatterEnd
      (tmp_e2,y,INSERT_VALUES,SCATTER_FORWARD,pc_data->put_edge);
    CHKERRQ(ierr);
    
    /* multiply edge to internal vector into tmp_i */
    ierr = MatMult(pc_data->C12,tmp_le2,tmp_li2); CHKERRQ(ierr);
    
    /* subtract from existing internal into tmp_i */
    ierr = VecAXPY(&mone,tmp_i2,tmp_i); CHKERRQ(ierr);
    
    /* solve internal into intl_vec */
    PLogEventBegin(events[DOMAIN_EVENT],0,0,0,0);
    ierr = SLESSolve(local_sles,tmp_li,tmp_li2,&its); CHKERRQ(ierr);
    PLogEventEnd(events[DOMAIN_EVENT],0,0,0,0);
    
    /* write internal into global output */
    ierr = VecScatterBegin
      (tmp_i2,y,INSERT_VALUES,SCATTER_FORWARD,pc_data->put_intl);
    CHKERRQ(ierr);
    ierr = VecScatterEnd
      (tmp_i2,y,INSERT_VALUES,SCATTER_FORWARD,pc_data->put_intl);
    CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);

}

#undef __FUNC__
#define __FUNC__ "PCDomainDecomp_View"
static int PCDomainDecomp_View(PC pc,Viewer viewer)
{
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;
  MPI_Comm comm;
  int ntids,ierr;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)pc,&comm); CHKERRQ(ierr);
  MPI_Comm_size(comm,&ntids);
  if (ntids>1) {
    PetscPrintf(comm,">> Scatter to extract interface system:\n");
    ierr = VecScatterView(pc_data->get_intl,viewer); CHKERRQ(ierr);
    PetscPrintf(comm,">> Interface method\n");
    ierr = SLESView(pc_data->interface_method,viewer); CHKERRQ(ierr);
  }
  ierr = PCSubdomainsView(pc,viewer); CHKERRQ(ierr);

  PetscFunctionReturn(0);
}
#undef __FUNC__
#define __FUNC__ "PCSetFromOptions_DomainDecomp"
static int PCSetFromOptions_DomainDecomp(PC pc)
{
  PC_DDecomp_struct *data = (PC_DDecomp_struct *) pc->data;
  SLES sles = data->interface_method;
  char *prefix; int ierr;

  PetscFunctionBegin;
  /* set the interface method from options */
  ierr = PCGetOptionsPrefix(pc,&prefix); CHKERRQ(ierr);
  ierr = SLESSetOptionsPrefix(sles,prefix); CHKERRQ(ierr);
  ierr = SLESAppendOptionsPrefix(sles,"interface_"); CHKERRQ(ierr);
  ierr = SLESSetFromOptions(sles); CHKERRQ(ierr);

  /* set the subdomain method from options */
  ierr = PCParallelLocalSolveSetFromOptions(pc); CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef __FUNC__
#define __FUNC__ "PCDestroy_DDecomp"
static int PCDestroy_DDecomp(PC pc)
{
  SLES local_sles;
  PC_DDecomp_struct *pc_data = (PC_DDecomp_struct *) pc->data;
  int ierr;

  PetscFunctionBegin;
  ierr = PCParallelGetLocalSLES(pc,&local_sles); CHKERRQ(ierr);
  ierr = SLESDestroy(local_sles); CHKERRQ(ierr);
  ierr = SLESDestroy(pc_data->interface_method); CHKERRQ(ierr);
  ierr = ISDestroy(pc_data->interface_set); CHKERRQ(ierr);

  ierr = MatDestroy(pc_data->C11); CHKERRQ(ierr);
  ierr = MatDestroy(pc_data->C12); CHKERRQ(ierr);
  ierr = MatDestroy(pc_data->C21); CHKERRQ(ierr);
  /*
  ierr = MatDestroy(pc_data->C12_big); CHKERRQ(ierr);
  ierr = MatDestroy(pc_data->C21_big); CHKERRQ(ierr);
  */
  ierr = MatDestroy(pc_data->interface_system); CHKERRQ(ierr);

  ierr = VecDestroy(pc_data->mpi_edge_vec); CHKERRQ(ierr);
  ierr = VecDestroy(pc_data->local_edge_vec); CHKERRQ(ierr);
  ierr = VecDestroy(pc_data->mpi_edge_vec2); CHKERRQ(ierr);
  ierr = VecDestroy(pc_data->local_edge_vec2); CHKERRQ(ierr);

  ierr = VecDestroy(pc_data->mpi_intl_vec); CHKERRQ(ierr);
  ierr = VecDestroy(pc_data->local_intl_vec); CHKERRQ(ierr);
  ierr = VecDestroy(pc_data->mpi_intl_vec2); CHKERRQ(ierr);
  ierr = VecDestroy(pc_data->local_intl_vec2); CHKERRQ(ierr);

  ierr = VecDestroy(pc_data->interface_vector); CHKERRQ(ierr);

  ierr = VecScatterDestroy(pc_data->get_edge); CHKERRQ(ierr);
  ierr = VecScatterDestroy(pc_data->put_edge); CHKERRQ(ierr);
  ierr = VecScatterDestroy(pc_data->get_intl); CHKERRQ(ierr);
  ierr = VecScatterDestroy(pc_data->put_intl); CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef __FUNC__
#define __FUNC__ "PCCreate_DomainDecomp"
int PCCreate_DomainDecomp(PC pc)
{
  int ierr;

  pc->ops->apply     = PCApply_DomainDecomp;
  pc->ops->applyrichardson = 0;
  pc->ops->destroy   = PCDestroy_DDecomp;
  pc->ops->setfromoptions   = PCSetFromOptions_DomainDecomp;
  pc->ops->setup     = PCSetup_DomainDecomp;
  /*  pc->type      = PCDomainDecomp;*/
  pc->ops->view      = PCDomainDecomp_View;

  PetscFunctionBegin;
  /* create subsolvers for the interface system and interior */
  ierr = PCParallelSubdomainsCreate(pc,sizeof(PC_DDecomp_struct));
  CHKERRQ(ierr);
  {
    PC_DDecomp_struct *bij = (PC_DDecomp_struct *) pc->data;
    SLES sles; KSP subksp; PC subpc;

    bij->C11 = bij->C12 = bij->C21 = 0;
    /* bij->C12_big = bij->C21_big = 0;*/
    bij->get_edge = bij->put_edge = bij->get_intl = bij->put_intl = 0;

    bij->width = 1;

    ierr = SLESCreate(pc->comm,&sles); CHKERRQ(ierr);
    ierr = SLESGetKSP(sles,&subksp); CHKERRQ(ierr);
    ierr = KSPSetType(subksp,KSPPREONLY); CHKERRQ(ierr);
    ierr = SLESGetPC(sles,&subpc); CHKERRQ(ierr);
    ierr = PCSetType(subpc,PCNONE); CHKERRQ(ierr);
    bij->interface_method = sles;
  }

  PetscFunctionReturn(0);
}

#undef __FUNC__
#define __FUNC__ "ParPreDomainDecompInit"
int ParPreDomainDecompInit()
{
  int ierr;
  PetscFunctionBegin;
  ierr = PLogEventRegister
    (events+DOMAIN_EVENT,  "Subdomain solve ",PETSC_NULL); CHKERRQ(ierr);
  ierr = PLogEventRegister
    (events+GAMMA_EVENT,   "Separator solve ",PETSC_NULL); CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
