/*---------------------------------------------------------------*/
/*     	CAPSS: A Cartesian Parallel Sparse Solver                */
/*     	Beta Release                                             */
/*      Author: Padma Raghavan                                   */
/*---------------------------------------------------------------*/
#include "d_n_fact.h"
d_solve_spd(	local_phase_node, f_solve_routine, b_solve_routine)

int			local_phase_node, (*f_solve_routine) (),
			(*b_solve_routine) () ;
{
		
	int	my_side,	nprocs,		
		next,		ncols,
		start_proc,	partner;
		
	int				current, n ,t; 
	double 	p_inv, last_clock;
	extern	double	clock0(), stats[];
		


	


	d_solve_allocate(
                max_b_size,     max_mat_size_in_d_phase, 
		max_stack_columns);

	dist_initialize_spd(local_phase_node);

	
	for (next=0, current = tree_parent[local_phase_node];
			current != -1; next++ ) {


		sync0();
		exch(&now_ptr, &child_ptr);	

		nprocs = tree_count_procs[next];
		start_proc = tree_start_procs[next];

		ncols=
		dist_set_up_rhs_spd(
				me, 
				current,
				tree_child[current],
				nprocs, start_proc);

				

		last_clock = clock0();

               	(*f_solve_routine)  
                                ( current,
                                nprocs,         start_proc,
                                (stack_map+ now_ptr),
                                tree_chains,
                                chain_index,
                                tmp_global+now_ptr,
                                ncols);
                stats[so_d_t] += clock0() - last_clock;




		n = factor_struc_sizes[current];
		t = chain_index[current+1] - chain_index[current] ;
		p_inv = (double) (1.0/nprocs);

		stats[so_d_o] += 2* ((double)((2*n*t - t*t) *p_inv +n));

		dist_move_solution (
	      			current,
				(tree_chains +chain_index[current]),
				(chain_index[current+1] - chain_index[current]), 
				(stack_map+now_ptr),
				index_list,
				&done_solution_elements);

		current = tree_parent[current];


	}	/*end while - fact + f_solve*/


	last_clock = clock0();
	for	( stack_ptr=0, next--, current = tree_size-1;
			current!= local_phase_node;  next--) {
		nprocs = tree_count_procs[next];
		start_proc = tree_start_procs[next];
               	(*b_solve_routine)  
				(
				current, 
				nprocs,
				start_proc);
		current = tree_child[current];

	}	/*end while - b_solve*/
	stats[so_d_t] += clock0() - last_clock;

}/*end  sparse_solve */
			

dist_set_up_rhs_spd(
		me, 
		current,
		child,
		nprocs, start_proc)

int		me,
		current,
		child,
		nprocs,
		start_proc;
{
		


		int ncols;





		set_map_spd((stack_map+ now_ptr), 
				factor_struc_sizes[current],
				nprocs, start_proc, 
				&ncols);	


		dist_set_up_rhs(current,
				index_list,
				stack_map);





		dist_add_rhs1_spd (	current, 
					child,
					index_list);

		dist_move_rhs_spd(current,
				child,
				index_list, ncols);



		dist_add_rhs2_spd(	
				current,
				index_list,
				(chain_index + current),
				(tree_chains + chain_index[current]),
				(chain_index[current+1] - 
					chain_index[current]),
				(tree_local_column + chain_index[current]));
		return(ncols);


}/*end set_matrix*/
dist_add_rhs1_spd (	current, 
			child,
			index_list)
int		current, child, *index_list;
{


		int	this,	this_nonz,	this_size, 	*this_struc,
			next,	child_nonz,	child_size,	*child_struc;
		int	*ptr_to_parent;
		

		set_to(have_contrib, factor_struc_sizes[current],-1);
		have_contrib_next=0;

		for (this_struc = factor_struc[current], 
				this_size = factor_struc_sizes[current],
				child_struc = factor_struc[child],
				child_size  = factor_struc_sizes[child],
				this_nonz = index_list[current],
				child_nonz= index_list[child],
				next = this=0;
				next < child_size;) {
			

			if ((child_struc[next] == this_struc[this]) &&
				(stack_map[child_ptr +next] == me))
				{
				if  (stack_map[now_ptr +this] == me) {
				       stack_b[this_nonz +
						tmp_local[now_ptr + this]]
						  +=stack_b[(child_nonz
							+tmp_local[child_ptr
							+next])];
				}

					

				else {

					have_contrib[this] = have_contrib_next;
				       contrib_b[have_contrib_next]=
						  stack_b[(child_nonz
							+tmp_local[child_ptr
							+next])];

					have_contrib_next++;
					}


					this++; next++;
				}
				else if (child_struc[next] > this_struc[this])
					{ 
					this++; 
					}
					else { next++;
						}
		}							


}/*dist_add_and_free*/

			

dist_set_up_rhs(	current,
		index_list,
		stack_map)

int		current,
		*index_list,
		*stack_map;

{

		int 	col,	current_size, next;
		
		current_size = factor_struc_sizes[current]; 

		index_list[current] = stack_ptr;
		for (col=0, next= stack_ptr; 
				col < current_size; col++) {
			if ( stack_map[now_ptr+ col] == me) {
			tmp_local[now_ptr + col] = next 
					- index_list[current];
			tmp_global[now_ptr + next- 
					index_list[current]] = col;
			stack_b[next] = 0.0;
			next++;
			}
		}
		index_list[current+1] =stack_ptr= next;




}/*set_up*/
		

dist_add_rhs2_spd(	current,
		index_list,
		chain_index,
		chain,
		chain_size,
		local_column)

int		current,
		*index_list,
		*chain_index,
		*chain,
		chain_size,
		*local_column;
{

		int	col,	*factor_struc_ptr, this_nonz, 
			local_column_of_A, next, op_code;


		for (col=0, factor_struc_ptr = factor_struc[current],
			this_nonz = index_list[current]; col < chain_size; 
					col++) {
			local_column_of_A= local_column[col];
			if (local_column_of_A != EMPTY) {
				if ( (stack_map[now_ptr+col] == me)) {
				stack_b[(this_nonz +
					tmp_local[now_ptr+col])] 
					+= b[local_column_of_A];
				}	
			else {
 			if ( have_contrib[col] == EMPTY) {
					have_contrib[col] = have_contrib_next;
					next= have_contrib_next;
					have_contrib_next++;
					contrib_b[next] =
					 b[local_column_of_A];
			}
			else  {
				next =have_contrib[col];
				contrib_b[next]+=
					 b[local_column_of_A];
				}
			} /*else*/	

		}

		}

}/*end add_nonz_of_rhs*/


dist_move_rhs_spd(	current,

		child,	
		index_list, ncols)

int		current,
		child,
		*index_list, 	ncols;

{

		int 	col,	current_size,	next, later;

		for (col=0,next= index_list[child],
				current_size = ncols,
				later = index_list[current];
				col < current_size;
					col++) {
			if ( later != next) {
			stack_b[next] = stack_b[later];
			next++; later++;
			}
			
		}
		index_list[current] = index_list[child]; 
		index_list[current+1] = next;
		stack_ptr = index_list[current+1];

}/*move over */

dist_move_solution (
	      	current,
		chain,
		chain_size,		
		map,
		index_list,
		done_solution_elmts)

int	
	      	current,
		*chain,
		chain_size,		
		*map,
		*index_list,
		*done_solution_elmts;
{
		int	col,	this,	this_nonz,	*this_struc;
		int	cols;

		for (col=0, this_struc= factor_struc[current],
			this_nonz= index_list[current],
			this=0; col < chain_size; col++) {
			if (( this_struc[this] == chain[col]) && 
				(map[this] ==me)) {
			
				factor_b[*done_solution_elmts] = 
						stack_b[this_nonz];
				(*done_solution_elmts)++ ;
				this_nonz++ ;

				
			}
			this++; 
		}

}/*dist_move_into_solution*/

dist_set_up_b_in_stack( current, parent, index_list) 
int		current,	parent, *index_list;
{

		int	col,	in_parent, in_child, *ptr_to_parent;

		if ( parent != -1)
			in_parent = index_list[parent];
		else in_parent = -1;

		if((stack_ptr + factor_struc_sizes[current]) >= max_b_size) {

			exit_err("stack_overflow",int_err3);
		}

		if (in_parent == -1) { 
					index_list[current] = stack_ptr;
					stack_ptr += factor_struc_sizes[current];
					set_to_double(0.0,
					(stack_b+ index_list[current]),
					factor_struc_sizes[current]);
					
					return (0);
		}
			
		for (col=0, in_child =   stack_ptr,
			ptr_to_parent = ptr_to_parent_factorstruc[current]; 
				col < factor_struc_sizes[current]; col++) {
			if ( ptr_to_parent[col] != -1)
			stack_b[in_child+col] = stack_b[in_parent +
						ptr_to_parent[col]];
			else stack_b[in_child+col] = 0.0;
		}
			
				/*move over*/
		index_list[current] = index_list[parent];
		copy_to((stack_b + index_list[current]),
			(stack_b+stack_ptr),factor_struc_sizes[current]);
		stack_ptr=index_list[current] +factor_struc_sizes[current];
		
}/*end dist_set_up_in_stack*/

d_solve_allocate( max_b_size,	max_mat_size, stack_size)
int	
	max_b_size, max_mat_size,	stack_size;
{



		if ((
		contrib_b = (double *) 
			malloc(((max_mat_size+1)* (sizeof(double)))))
					== NULL)
				exit_err(
				"Error allocating contrib_b",
					malloc_err);
		set_to(have_contrib, max_mat_size, -1);

		

}

dist_f_solve_using_inv	(
		current,	
		nprocs, 	start_proc,	
		map, 
		tree_chains,
		chain_index,
		g_cols,
		ncols)

int		
		current,
		nprocs, start_proc,
		*map,
		*tree_chains,
		*chain_index,
		*g_cols,
		ncols;

{


		dist_mat_vec_and_reduce(
			me,	 nprocs, 	start_proc,
			(chain_index[current+1] - chain_index[current]),
			factor_struc_sizes[current],
			ncols,  map,    g_cols,         
			factor_nonz + factor_index_list[current],
			stack_b + index_list[current],
			have_contrib,
                	contrib_b,
			((*factor_struc[current])));



}

dist_b_solve_using_inv	(
		current, 
		nprocs, start_proc)

int		 current, nprocs, start_proc;
{
		int	nrows, n,t, i,j;
		double	p_inv;


		set_map_spd((stack_map), factor_struc_sizes[current],
			nprocs, start_proc, &nrows);

		set_tmp_globals(tmp_global, stack_map, 
			(chain_index[current+1] - chain_index[current]),
				&nrows);
	

		dist_set_up_b_in_stack(
			current, tree_parent[current], index_list); 


		dist_b_reduce (me,	stack_map, 
			tmp_global,
			nrows,
			(chain_index[current+1] - chain_index[current]),
			factor_struc_sizes[current] -1,
			factor_struc[current],
			factor_struc_sizes[current],
			(factor_nonz + factor_index_list[current]),
			(stack_b + index_list[current]),	
			factor_b + factor_index_list[current]);


		dist_mat_vec( me, nprocs, start_proc,
			(chain_index[current+1] - chain_index[current]),
			nrows,
			stack_map,
			tmp_global,	
			(factor_nonz + factor_index_list[current]),
			factor_b + factor_index_list[current],
			(stack_b + index_list[current]),	
			((*factor_struc[current])));


		if(me ==start_proc) { 
 			copy_to((b_tree_chains+chain_index[current]),
                                (stack_b+index_list[current]),
                                (chain_index[current+1] -
                                        chain_index[current]));
                                

		}
					
				
}/*end*/

dist_f_solve	(
		current,	
		nprocs, 	start_proc,	
		map, 
		tree_chains,
		chain_index,
		g_cols,
		ncols)

int		
		current,
		nprocs, start_proc,
		*map,
		*tree_chains,
		*chain_index,
		*g_cols,
		ncols;

{


		dist_fsolve(
			me,	 nprocs, 	start_proc,
			(chain_index[current+1] - chain_index[current]),
			factor_struc_sizes[current],
			ncols,  map,    g_cols,         
			factor_nonz + factor_index_list[current],
			stack_b + index_list[current],
			have_contrib,
                	contrib_b,
			((10000+ *factor_struc[current])));



}

dist_b_solve	(
		current, 
		nprocs, start_proc)

int		 current, nprocs, start_proc;
{
		int	nrows, n,t, i,j;
		double	p_inv;


		set_map_spd((stack_map), factor_struc_sizes[current],
			nprocs, start_proc, &nrows);

		set_tmp_globals(tmp_global, stack_map, 
			(chain_index[current+1] - chain_index[current]),
				&nrows);
	

		dist_set_up_b_in_stack(
			current, tree_parent[current], index_list); 


		dist_b_reduce (me,	stack_map, 
			tmp_global,
			nrows,
			(chain_index[current+1] - chain_index[current]),
			factor_struc_sizes[current] -1,
			factor_struc[current],
			factor_struc_sizes[current],
			(factor_nonz + factor_index_list[current]),
			(stack_b + index_list[current]),	
			factor_b + factor_index_list[current]);


		row_ubs_ring( me, nprocs, start_proc,
			(chain_index[current+1] - chain_index[current]),
			nrows,
			stack_map,
			tmp_global,	
			(factor_nonz + factor_index_list[current]),
			factor_b + factor_index_list[current],
			(stack_b + index_list[current]),	
			((*factor_struc[current])));


		if(me ==start_proc) { 
 			copy_to((b_tree_chains+chain_index[current]),
                                (stack_b+index_list[current]),
                                (chain_index[current+1] -
                                        chain_index[current]));
                                

		}
					
				
}/*end*/
