/*---------------------------------------------------------------*/
/*     	CAPSS: A Cartesian Parallel Sparse Solver                */
/*     	Beta Release                                             */
/*      Author: Padma Raghavan                                   */
/*---------------------------------------------------------------*/
#include "d_n_fact.h"
d_factor_spd(	local_phase_node, factor_routine)

int			local_phase_node, (*factor_routine) () ;
{
		
	int	my_side,	nprocs,		
		next,		ncols,
		start_proc,	partner,	current;
		
	double 	 last_clock;
	extern	double	clock0(), stats[];
		


	


	d_factor_allocate(
                max_b_size,     max_mat_size_in_d_phase, 
		max_stack_columns);

	dist_initialize_spd(local_phase_node);

	
	for (next=0, current = tree_parent[local_phase_node];
			current != -1; next++ ) {

		


		sync0();
		exch(&now_ptr, &child_ptr);	

		nprocs = tree_count_procs[next];
		start_proc = tree_start_procs[next];

		ncols=
		dist_set_up_spd(
				me, 
				current,
				tree_child[current],
				nprocs, start_proc);

				

		last_clock = clock0();
		(*factor_routine)	
				( current,	
				nprocs, 	start_proc,	
				(stack_map+ now_ptr), 
				tree_chains,
				chain_index,
				tmp_global+now_ptr,
				ncols);
		stats[n_d_t] += clock0() - last_clock;



		dist_move_into_factor_spd (
	      			current,
				(tree_chains +chain_index[current]),
				(chain_index[current+1] - chain_index[current]), 
				(stack_map+now_ptr),
				index_list,
				&done_factor_columns);

		current = tree_parent[current];


	}	/*end while - fact + f_solve*/



}/*end  sparse_factor */
			

dist_set_up_spd(
		me, 
		current,
		child,
		nprocs, start_proc)

int		me,
		current,
		child,
		nprocs,
		start_proc;
{
		


		int ncols;



		set_map_spd((stack_map+ now_ptr), 
				factor_struc_sizes[current],
				nprocs, start_proc, 
				&ncols);	


		dist_set_up_matrix_spd(current, 
					index_list, 
					stack_map);

		dist_add_and_free_matrix_spd (	current, 
					child,
					index_list);

		dist_move_over_matrix_spd(current,
				child,
				index_list, ncols);



		dist_add_nonz_of_A_spd(	
				current,
				index_list,
				(chain_index + current),
				(tree_chains + chain_index[current]),
				(chain_index[current+1] - 
					chain_index[current]),
				(tree_local_column + chain_index[current]));
		return(ncols);


}/*end set_matrix*/
set_map_spd(vec, size, nprocs, start_proc,  ncols )
int	*vec, size, nprocs, start_proc, *ncols;
{
		int i,next;
		
		for (i=next=0; i< size; i++) {
			vec[i] = start_proc + (i% nprocs);
			if (vec[i] == me) {
				next++;
			}
		}
		*ncols = next;
}/*end set_map*/
dist_add_and_free_matrix_spd (	current, 
			child,
			index_list)
int		current, child, *index_list;
{


		int	this,	this_nonz,	this_size, 	*this_struc,
			next,	child_nonz,	child_size,	*child_struc;
		int	*ptr_to_parent;
		

		set_to(have_contrib, factor_struc_sizes[current],-1);
		have_contrib_next=0;

		for (this_struc = factor_struc[current], 
			ptr_to_parent= ptr_to_parent_factorstruc[child],
				this_size = factor_struc_sizes[current],
				child_struc = factor_struc[child],
				child_size  = factor_struc_sizes[child],
				this_nonz = index_list[current],
				child_nonz= index_list[child],
				next = this=0;
				next < child_size;) {
			
			if (child_struc[next] == this_struc[this]) 
				ptr_to_parent[next] = this;

			if ((child_struc[next] == this_struc[this]) &&
				(stack_map[child_ptr +next] == me))
				{
				if  (stack_map[now_ptr +this] == me) {
					copy_expand(ADD,
					           this_struc[this],
							
						   (this_struc + this),
						    stack_nonz[
							(this_nonz +
							tmp_local[now_ptr +
								this])],
						    stack_nonz_sizes[
							(this_nonz +
							tmp_local[now_ptr+
								this])],
						
						   (child_struc + next),	
						  stack_nonz[(child_nonz
							+tmp_local[child_ptr
							+next])],
						  stack_nonz_sizes[
						(child_nonz+
						tmp_local[child_ptr+next])]);
				}

					

				else {

					have_contrib[this] = have_contrib_next;
					contrib_nonz_sizes[
						have_contrib_next]=
						factor_struc_sizes[current]
							-this;
					
					
					if ((
					contrib_nonz[
						have_contrib_next]=
					(float *) malloc((
					contrib_nonz_sizes[have_contrib_next]*
					float_size))) == NULL)
					exit_err("contrib_nonz",malloc_err);

					copy_expand(NOT_ADD,
					           this_struc[this],
							
						   (this_struc + this),
						   contrib_nonz[
							have_contrib_next],
						    contrib_nonz_sizes[
							have_contrib_next],
						
						   (child_struc + next),	
						   stack_nonz[(child_nonz
							+tmp_local[child_ptr
							+next])],
						  stack_nonz_sizes[
						(child_nonz+
						tmp_local[child_ptr+next])]);

					have_contrib_next++;
					}

					stack_nonz_sizes[child_nonz
					+ tmp_local[child_ptr + next]] =0;

					this++; next++;
				}
				else if (child_struc[next] > this_struc[this])
					{ 
					this++; 
					}
					else { next++;
						}
		}							

if ( stack_nonz[index_list[child]] != NULL)
	free ((char *) stack_nonz[index_list[child]]);
}/*dist_add_and_free*/

dist_initialize_spd(local_phase_node)
int	local_phase_node;
{
		

	int i;

	now_ptr =0; child_ptr = max_mat_size_in_d_phase;

	for (i=0; i < factor_struc_sizes[local_phase_node]; i++) {
		tmp_local[i] = tmp_global[i] =i;
		stack_map[i] = me;
	}
	
		
 
}
dist_factor_invert_spd(
		current,	
		nprocs, 	start_proc,	
		map, 
		tree_chains,
		chain_index,
		g_cols,
		ncols)

int	
	
		current,
		nprocs, start_proc,
		*map,
		*tree_chains,
		*chain_index,
		*g_cols,
		ncols;
{
	



		int  n, t;
		double p_inv;
		dist_fanin_chol
			(me, nprocs, start_proc,
			(chain_index[current+1] - chain_index[current]),
			factor_struc_sizes[current], ncols, map,
			g_cols, stack_nonz + index_list[current],
			1, have_contrib_next,
			have_contrib, contrib_nonz,      contrib_nonz_sizes,
			((* factor_struc[current])));

		n = factor_struc_sizes[current];
		t = chain_index[current+1] - chain_index[current] ;
		p_inv = (double) (1.0/nprocs);

		stats[n_d_o] += (double)
			((n*n*t - n*t*t + (t*t*t)/3.0)*p_inv)+
			((n*n + n*nprocs)/2.0);

		dist_fanring_invert
			(me, nprocs, start_proc,
			(chain_index[current+1] - chain_index[current]),
			factor_struc_sizes[current], ncols, map,
			g_cols, stack_nonz + index_list[current],
			1, ((* factor_struc[current])));

					/*for inversion */
		stats[n_d_o] += (double)
			((n*t + (t*t*t)/3.0)*p_inv);

}
			

dist_set_up_matrix_spd(	current,
		index_list,
		stack_map)

int		current,
		*index_list,
		*stack_map;

{

		int *factor_struc_ptr,	col,	current_size, next;
		int	*ptr_to_parent;
		float 	*tmp_store;
		int 	tmp_store_size;
		
		current_size = factor_struc_sizes[current]; 
		for (col=0, tmp_store_size=2; col < current_size; col++)
			if ( stack_map[now_ptr+ col] == me)
				tmp_store_size += current_size -col;
		
		if (( tmp_store = (float *) malloc((tmp_store_size* 
					float_size))) == NULL)
				exit_err( "dist_set-up -- tmp_store", 
						malloc_err);
              if (( ptr_to_parent_factorstruc[current] =
                        (int *) malloc (( current_size*int_size)))
                                == NULL)
                                exit_err("ptr_to_parent", malloc_err);

		index_list[current] = stack_ptr;
		for (col=0, next= stack_ptr, factor_struc_ptr = 
					factor_struc[current],
				ptr_to_parent = 
				ptr_to_parent_factorstruc[current];
				col < current_size;
					col++) {
		
			if ( stack_map[now_ptr+ col] == me) {
			tmp_local[now_ptr + col] = next 
					- index_list[current];
			tmp_global[now_ptr + next- 
					index_list[current]] = col;
			stack_nonz_sizes[next] = current_size - col;
			stack_nonz[next] = tmp_store ;
			tmp_store += current_size - col;
			tmp_store_size -= current_size -col;
			zero_out(stack_nonz[next], (current_size-col));
			next++;
			}
			ptr_to_parent[col] = -1;
		}
		index_list[current+1] =stack_ptr= next; 
               	if ( tmp_store_size <= 0) {
                        exit_err("error in tmp_store - dist_set_up",int_err3);
                }




}/*set_up*/
		

dist_add_nonz_of_A_spd(	current,
		index_list,
		chain_index,
		chain,
		chain_size,
		local_column)

int		current,
		*index_list,
		*chain_index,
		*chain,
		chain_size,
		*local_column;
{

		int	col,	*factor_struc_ptr, this_nonz, 
			local_column_of_A, next, op_code;


		for (col=0, factor_struc_ptr = factor_struc[current],
			this_nonz = index_list[current]; col < chain_size; 
					col++) {
			local_column_of_A= local_column[col];
			if (local_column_of_A != EMPTY) {
				if ( (stack_map[now_ptr+col] == me)) {
				copy_expand(ADD,
					    factor_struc_ptr[col],

					    (factor_struc_ptr + col), 
					    stack_nonz[(this_nonz
						+tmp_local[now_ptr +col])],
					    stack_nonz_sizes[(this_nonz
						+tmp_local[now_ptr +col])],
						
					   (a_struc+ a_index
						[local_column_of_A]),	
					   (a_nonz+ a_index[local_column_of_A]),
						a_size[local_column_of_A]);
				}	
			else {
 			if ( have_contrib[col] == EMPTY) {
					have_contrib[col] = have_contrib_next;
					contrib_nonz_sizes[
						have_contrib_next]=
						factor_struc_sizes[current]
							-col;
					if ((
					contrib_nonz[
						have_contrib_next]=
					(float *) malloc((
					contrib_nonz_sizes[have_contrib_next]*
					float_size))) == NULL)
					exit_err("contrib_nonz",malloc_err);
					next= have_contrib_next;
					have_contrib_next++;
					op_code = NOT_ADD;
			}
			else  {
				next =have_contrib[col];
				op_code = ADD; 
				}
			copy_expand(op_code,
			          factor_struc_ptr[col],
				 (factor_struc_ptr + col),
				  contrib_nonz[ next],
			    	  contrib_nonz_sizes[ next],
				 (a_struc + a_index[ local_column_of_A]),	
				   (a_nonz + a_index[ local_column_of_A]),	
				  a_size[local_column_of_A]);
			} /*else*/	

		}

		}

}/*end add_nonz_ofA*/


dist_move_over_matrix_spd(	current,

		child,	
		index_list, ncols)

int		current,
		child,
		*index_list, 	ncols;

{

		int *factor_struc_ptr,	col,	current_size,	next, later;

		for (col=0,next= index_list[child],
				 factor_struc_ptr = factor_struc[current],
				current_size = ncols,
				later = index_list[current];
				col < current_size;
					col++) {
			if ( later != next) {
			stack_nonz_sizes[next] = stack_nonz_sizes[later];
			stack_nonz[next] = stack_nonz[later] ;
			stack_nonz[later] = NULL;
			stack_nonz_sizes[later] =0;
			next++; later++;
			}
			
		}
		index_list[current] = index_list[child]; 
		index_list[current+1] = next;
		stack_ptr = index_list[current+1];

}/*move over */

dist_move_into_factor_spd (
	      	current,
		chain,
		chain_size,		
		map,
		index_list,
		done_factor_columns)

int	
	      	current,
		*chain,
		chain_size,		
		*map,
		*index_list,
		*done_factor_columns;
{
		int	col,	this,	this_nonz,	*this_struc;
		int	tmp_store_size,cols;
		float	*tmp_store;
		extern	double	 stats[];
		for (col=cols=0, tmp_store_size =2, this_struc= 
				factor_struc[current],
				this =0,
				this_nonz= index_list[current];
				col < chain_size; col++) {
			if (( this_struc[this] == chain[col]) && 
				(map[this] ==me)) {
				tmp_store_size += stack_nonz_sizes[this_nonz];
				cols++;
			}
			this++ ;
		}
		stats[L_cols] += cols;
		if (( tmp_store = (float *) 
			malloc((tmp_store_size*float_size))) == NULL)
				exit_err( "tmp_store - dist_move_into-l",
							malloc_err);
			

		for (col=0, this_struc= factor_struc[current],
			factor_index_list[current] = *done_factor_columns,
			this_nonz= index_list[current],
			this=0; col < chain_size; col++) {
			if (( this_struc[this] == chain[col]) && 
				(map[this] ==me)) {
			
				factor_nonz[*done_factor_columns] = 
					tmp_store ;
				copy_to(factor_nonz[*done_factor_columns],
					stack_nonz[this_nonz],
					stack_nonz_sizes[this_nonz]);
				tmp_store += stack_nonz_sizes[this_nonz];
				tmp_store_size -=stack_nonz_sizes[this_nonz];
						

				factor_nonz_sizes[*done_factor_columns] = 
						stack_nonz_sizes[this_nonz];	
				stats[L_nz] += stack_nonz_sizes[this_nonz];
				stack_nonz_sizes[this_nonz]  =0;
				(*done_factor_columns)++ ;
				this_nonz++ ;

				
			}
			this++; 
		}
		factor_index_list[current+1] = *done_factor_columns;
                if ( tmp_store_size <= 0) {
                        exit_err("error in tmp_store - dist_move_into_l",
					int_err3);
                }

}/*dist_move_into_factor*/

set_tmp_globals(list, map,  size, nrows)
int	*list, *map, size,	*nrows;
{
		int i, j;
		for (i=j=0; i < size; i++)
		{
			if ( map[i] == me)
			{
				list[j] = i;
				j++;
			}
		}
		*nrows = j ;
}/*end*/

exch (x,y)
int	*x,	*y;
{
		int buff;
		buff = *x;
		*x = *y ;
		*y = buff;
}


d_factor_allocate( max_b_size,	max_mat_size, stack_size)
int	
	max_b_size, max_mat_size,	stack_size;
{
		float 	*tmp_store;
		int	tmp_store_size, col, col_size;





		if ((stack_map =
		 (int *) malloc((2*max_mat_size* sizeof(int )))) == NULL)
				exit_err(
				"Error allocating stack_map", malloc_err);	
		if ((tmp_local =
		 (int *) malloc((2*max_mat_size* sizeof(int )))) == NULL)
				exit_err(
				"Error allocating tmp_localn", malloc_err);	
		if ((tmp_global=
		 (int *) malloc((2*max_mat_size* sizeof(int )))) == NULL)
				exit_err(
				"Error allocating tmp_global", malloc_err);	

		if ((
		have_contrib = (int *) 
			malloc(((max_mat_size+1)* int_size ))) == NULL)
			exit_err( "Error allocating have_contrib",
				malloc_err);	
		if ((
		contrib_nonz_sizes = (int *) 
			malloc(((max_mat_size+1)* int_size ))) == NULL)
				exit_err(
				"Error allocating contrib_nonz_sizes",
					malloc_err);
		if ((
		contrib_nonz = (float **) 
			malloc(((max_mat_size+1)* (sizeof(float *)))))
					 == NULL)
				exit_err(
				"Error allocating contrib_nonz",
					malloc_err);
		set_to(have_contrib, max_mat_size, -1);

		

}
	
dist_factor_spd(
		current,	
		nprocs, 	start_proc,	
		map, 
		tree_chains,
		chain_index,
		g_cols,
		ncols)

int	
	
		current,
		nprocs, start_proc,
		*map,
		*tree_chains,
		*chain_index,
		*g_cols,
		ncols;
{
	

		int  n, t;
		double p_inv;


		dist_fanin_chol
			(me, nprocs, start_proc,
			(chain_index[current+1] - chain_index[current]),
			factor_struc_sizes[current],
			ncols,
			map,
			g_cols,
			stack_nonz + index_list[current],
			1,
			have_contrib_next,
			have_contrib,
                	contrib_nonz,      contrib_nonz_sizes,
			((* factor_struc[current])));

		n = factor_struc_sizes[current];
		t = chain_index[current+1] - chain_index[current] ;
		p_inv = (double) (1.0/nprocs);

		stats[n_d_o] += (double)
			((n*n*t - n*t*t + (t*t*t)/3.0)*p_inv)+
			((n*n + n*nprocs)/2.0);

}
