/*---------------------------------------------------------------*/
/*     	CAPSS: A Cartesian Parallel Sparse Solver                */
/*     	Beta Release                                             */
/*      Author: Padma Raghavan                                   */
/*---------------------------------------------------------------*/
#include	"redistribute.h"
redistribute()
{
		extern		int		*col_map,	*edge_map,
				*in_graph_v,	 *start_numbers, *end_numbers;
		int 		*send_list, *size_vector, *hooks, *hooks_starts,
			 	*hooks_ends, list_size;
		real_type	*list;

		extern		double clock0(), stats[];
		double		last_clock;

		
		last_clock = clock0();
		last_used_in_scratch_list =0;
		size_vector = scratch_list;
		last_used_in_scratch_list += 4*( P+1);
		send_list = scratch_list + last_used_in_scratch_list;
		last_used_in_scratch_list += 2*( P+1);
		hooks_starts= scratch_list +last_used_in_scratch_list;
		last_used_in_scratch_list += P+1;
		hooks_ends = scratch_list + last_used_in_scratch_list;
		last_used_in_scratch_list += P+1;
		hooks = scratch_list + last_used_in_scratch_list;
		if ((last_used_in_scratch_list + 3*my_N+2) >
			max_size_scratch_list){
			/*
			printf("%d: scratch_list too small %d (%d)\n",
				my_pid, max_size_scratch_list, 
				(last_used_in_scratch_list + 2* my_N +2));
			*/
			exit_err("small list space1",size_err);
		}
		list_size =  (Asubs+1)*2*(2+ D)+ 2*Asubs;
		list_size = 2*list_size;
		free ((char *) d_scratch_list);
		free ((char *) k_vector);

       		if((list = (real_type *)
                        my_malloc((list_size*(sizeof(real_type))))) == NULL)
                        exit_err("order::list", malloc_err);


		set_edge_map (	col_map, 
				edge_map, 
				in_graph_v,
                		new_numbers, 
				start_numbers, 
				end_numbers);
		next_row_col=
		collect_data(	list, edge_map,
				hooks, hooks_starts, hooks_ends,
				send_list,
				size_vector);
		if ((next_row_col) >
			list_size){
			printf("%d: list allocation error %d (%d)\n",
			my_pid, list_size, 
			(next_row_col));
			exit_err("small list space",case_err);
		}
		next_col = next_row_col =next_b =0;
		send_and_get_data( size_vector, list, send_list);
		old_my_N = my_N; old_P = P; 
		old_M = M; old_N = N;
		old_Asubs = Asubs;
		my_N = N=  M= next_col;
		P =1;
		N_b = next_b;
		Asubs = next_nonz;
		if ((new_my_N != N) || ( new_Asubs != Asubs)){
			printf("%d: Goofy N  %d (%d), Asubs %d (%d)\n",
			my_pid, N, new_my_N, Asubs, new_Asubs);
			exit_err("redistribute: mismatch",case_err);
		}	
		free(((char *) list));
		free(((char *) scratch_list));
		stats[r_t] = clock0() - last_clock;
}/*end redistribute_data*/
collect_data(	list, edge_map,
		hooks, hooks_starts, hooks_ends,
		send_list, size_vector)
int		*edge_map,
		*hooks, *hooks_starts,
		*hooks_ends, *send_list, *size_vector;
real_type	*list;
{


		int 	col,	dim,	i,	j,	last,
			next,	p_next,	proc,  keep, keep_count,
			s_next;
		
		set_to(hooks_ends, P, EMPTY);
		set_to(hooks_starts, P, EMPTY);
		for (i=next=0; i < Asubs; i++){
			proc = edge_map[i];
			if (hooks_ends[proc] != EMPTY){
				hooks[hooks_ends[proc] +1] = next;
			} else hooks_starts[proc] =  next;
			hooks_ends[proc] = next;
			hooks[next++] = i;
			hooks[next++] = EMPTY;
		}		
				/*make linked list of items for each proc*/

		for (proc=next= p_next=s_next=0; proc <P; 
					proc++, p_next+=2, s_next+=2){
			send_list[p_next] = last =next;
			size_vector[s_next] = size_vector[s_next+1] = 0;
			keep = next++;
			keep_count =0;
			for(i=hooks_starts[proc]; i!= EMPTY; ){
				j = 2*hooks[i++];
				col = row_col[j];
				size_vector[s_next] +=1;
				keep_count++;
				list[next++] = (real_type) global_row_col[j];
				list[next++] = (real_type) 
						new_numbers[
						row_col[j]];	

				for (dim=0; dim <D; dim++){
					list[next++] = (real_type)
						 (*(xyz_int[dim] +col));
				}
				col = row_col[j+1];
				list[next++] = (real_type) global_row_col[j+1]; 
				list[next++] = (real_type) 
						new_numbers[
						row_col[j+1]];	
				for (dim=0; dim <D; dim++){
					list[next++] = (real_type)
						 (*(xyz_int[dim] +col));
				}
				list[next++] = (real_type) a_nonz[j/2];
				if (row_col[j] == row_col[j+1])
				list[next++] = (real_type) b[row_col[j]];
				else list[next++] = (real_type) 0.0;
				i= hooks[i];
			}	
			list[keep] = keep_count;
			send_list[p_next+1] =  next-last;
			size_vector[s_next+1] = size_vector[s_next];
		}
		return(next);
}/*end collect_data*/

			
send_and_get_data(size_vector, list, send_list)
real_type	*list;
int		*size_vector, *send_list;
{


		int 	proc, 	p_next, 	msg_type,
			info_bytes, 	info_type,	info_from,
			size,		j,	max_size;
		float	*recv_vector;
		extern	double stats [];
			

		add_gather(size_vector);
		MSG_TYPE += log_2_P +1;

		free_items();

                free_d_phase_lists();
                free_other_lists(D);

		re_allocate_space(size_vector, &recv_vector, &size);
		max_size = size * real_size;

		for (proc =p_next=0, msg_type =MSG_TYPE+1;
				 proc <P; proc ++,msg_type++, p_next+=2) {
			if (proc == my_pid) {
				unpack((list+ send_list[p_next]),
						send_list[p_next+1]);
				for (j=0; j <P-1; j++) {

					recv0((char *) recv_vector,
						max_size,
						msg_type);
					recvinfo0(&info_bytes,
						  &info_type,
						  &info_from);
					if (info_bytes >= max_size)
					 exit_err(
					"redistribute:max_size", case_err);
					size = info_bytes/real_size;

					unpack (recv_vector, 
						size);
					stats[r_o] += size;
				}
			}
			else {
				send0((char *)
					(list+ send_list[p_next]),
					((real_size) * send_list[p_next+1]),
					msg_type,
					proc);
				stats[r_c] += send_list[p_next+1];
			
			}
		}
		stats[r_c] /= Thousand;
}/*send_and_get_data*/
unpack(list, size_list)
real_type	*list;
int		size_list;
{

	int 	i, 	j,	dim, c,d, 
			number_of_nonz, number_of_vtx;
	for (i=0; i <size_list; ) {
			number_of_nonz = (int) list[i++];
			for (j=0; j <number_of_nonz; j++) {
				global_column[next_col] = c=
				global_row_col[next_row_col++] =  
							(int) list[i++];
				new_numbers[next_col] =(int) list[i++];
				for (dim=0; dim <D; dim++){
					 (*(xyz_int[dim] +next_col)) = 
						(int) list[i++];
				}
				next_col++;
				global_column[next_col] = d=
				global_row_col[next_row_col++] = 
							 (int) list[i++];
				new_numbers[next_col] =(int) list[i++];
				for (dim=0; dim <D; dim++){
					 (*(xyz_int[dim] +next_col)) = 
						(int) list[i++];
				}
				a_nonz[next_nonz++] = (real_type) list[i++];
				if (c ==d){
				b[next_b] = (real_type) list[i++];
				b_global_column[next_b++] =c;
				} else i++;      
				next_col++;
			}
	}	
		
				
}/*end unpack_data*/
					
set_edge_map (	col_map, edge_map, in_graph_v, 
		new_numbers, start_numbers, end_numbers )

int		*col_map,	*edge_map, 	*in_graph_v,
		*new_numbers,
		*start_numbers,	*end_numbers;
{
		
				/*in each row/col 
				if some vtxs are not numbered, then
					all unnumbered
					vtxs must belong to the same region..
					the start numbers - end numbers
					of this region 
					will be smaller than the new_numbers
					of any previously numbered vtxs.
					assign this row/col to the proc
					correponding to the region

				if all vtxs are numbered, then assign
					the entire row/col to the processor
					assigned to the column with the
					lowest new_number
				*/
		int i, j,	
		col,	next, mapped_to, 	small_new_number,
			next_mapped_to, next_new_number;

		 for (j=i=0; j <Asubs; j++) {

			col = row_col[i];
			i++;

			if (new_numbers[col] == EMPTY){
				small_new_number =
				start_numbers[in_graph_v[col]];
				mapped_to = in_graph_v[col];
			}
			else {
				small_new_number = new_numbers[col];
				mapped_to = col_map[col];
			}
			

			next = row_col[i];
			i++;
			if (new_numbers[next] == EMPTY){
					if (next < my_N) {
							next_new_number =
							start_numbers
							[in_graph_v[next]];
							next_mapped_to = 
							in_graph_v[next];
					} else {
				     		get_using_limits(next,
							&next_new_number,
							&next_mapped_to);
					}	
			}	
			else {
						next_new_number = 
						new_numbers[next];
						next_mapped_to = 
							col_map[next];
			}
					 
			if (next_new_number < small_new_number){
	 	
					small_new_number=
						next_new_number;
					mapped_to = next_mapped_to;
			}
			
			edge_map[j] = mapped_to;
		}
}/*end set_edge_map*/
re_allocate_space(size_vector, recv_vector, size)
int		*size_vector,	*size;
real_type		**recv_vector;
{

		int index, max_n, max_subs;	

		index = 2*my_pid;
		new_Asubs =  size_vector[index++];
		new_my_N = 2* new_Asubs;
		max_subs = size_vector[index++];
		*size =  2*(max_subs+1)*(D +2) +max_subs;
		*size = 2* (*size); 
		free (((char *) r_vector));
		free (((char *) s_vector));
		free (((char *) a_nonz));
		free (((char *) b));
		free (((char *) global_row_col));
		free (((char *) new_numbers));
		free (((char *) row_col));
		for (index =0; index <D; index++) {
			free (((char *) xyz_int[index]));
			free (((char *) xyz[index]));
		}
		free (((char *) new_numbers));

		if((*recv_vector = (real_type *)
                        my_malloc(((*size+1)*real_size))) == NULL)
                        exit_err("re_allocate_space:recv_vector", malloc_err);
		if((global_column = (int *)
                        my_malloc(((new_my_N+1)*int_size))) == NULL)
                        exit_err("re_allocate_space:global_column", malloc_err);
		if((b_global_column = (int *)
                        my_malloc(((new_my_N+1)*int_size))) == NULL)
                        exit_err("re_allocate_space:b_global_column", 
						malloc_err);
		if((new_numbers = (int *)
                        my_malloc(((new_my_N+1)*int_size))) == NULL)
                        exit_err("re_allocate_space:new_numbers", malloc_err);
		if((a_nonz = (real_type *)
                        my_malloc(((new_Asubs+1)*real_size))) == NULL)
                        exit_err("re_allocate_space:a_nonz", malloc_err);
		if((b = (real_type *)
                        my_malloc(((new_Asubs+1)*real_size))) == NULL)
                        exit_err("re_allocate_space:b", malloc_err);
		if((row_col = (int *)
                        my_malloc(((new_Asubs*2)*int_size))) == NULL)
                        exit_err("re_allocate_space:row_col", malloc_err);
		if((global_row_col = (int *)
                        my_malloc(((new_Asubs*2)*int_size))) == NULL)
                        exit_err("re_allocate_space:global_row_col", 
							malloc_err);

		for (index=0; index <D; index++){
			if ((xyz_int[index] = (int *) my_malloc(((new_my_N)
						*int_size))) == NULL)
                       	printf("\n%d:: Error re_allocating xyz_int[%d]\n",
					 my_pid, index);
			if ((xyz[index] = (int *) my_malloc(((new_my_N)
						*int_size))) == NULL)
                       		 printf("\n%d:: Error re_allocating xyz[%d]\n",
					 my_pid, index);
			
		}
}/*end reallocate*/
get_using_limits(next, new_number, mapped_to)
int		next, *new_number, *mapped_to;
{

		int dim,i, is_in, d2,j;
		extern	int	 *start_numbers;

		for (i=j=0,is_in= -1, d2= D*2; ((is_in == -1)&& (i<P)); i++) {
			for (is_in= i, dim=0; dim <D; dim++,j+=2){
				if (((*(xyz_int[dim] +next)) >=
					region_limits[j]) 
					&&  ((*(xyz_int[dim] +next)) <
						region_limits[j+1]))
						is_in =is_in;
				else is_in -= 1;
			}
			if (is_in != i) is_in = -1;
		}
		if (is_in == -1) {
			fprintf(dbgfile,"%d: very very weeeird\n",my_pid);
			fprintf(dbgfile,"%d: next %d %d \n",
					next, 
					*(xyz_int[0] + next),
					*(xyz_int[1] + next));
			*new_number = LARGE_BOUND;
		} else {
		*new_number = start_numbers[is_in];
		*mapped_to =  is_in;
		}
}
