
/*

Copyright 2010 Brian Caffrey, Tom Williams, Mario Fares.


this file is part of Clusterfunc.

    Clusterfunc is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    Clusterfunc is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Clusterfunc.  If not, see <http://www.gnu.org/licenses/>.


*/




//#include"fd.h"
#include<gsl/gsl_randist.h>
#include<gsl/gsl_rng.h>
#include<gsl/gsl_cdf.h>
#include<stdio.h>
#include<stdlib.h>// use of NULL, new/malloc 
#include<string>
#include<string.h>
#include<math.h>
#include<iostream>
#include<dirent.h>
#include<unistd.h>
#include<sys/stat.h>
#include<sys/time.h>
#include<getopt.h>
#include<memory>
#include<vector>
#include <Phyl/models>
#include<Phyl/DistanceEstimation.h>
#include<map>
#include <Phyl/BioNJ.h>
#include<Seq/SequenceContainerTools.h>
#include<Seq/VectorSequenceContainer.h>
#include<Seq/Sequence.h>
#include<Seq/SymbolList.h>
#include<Seq/SymbolListTools.h>

//#include "create.h"

#include "fd.h"
#define N 10

typedef struct{
	int intval;
	float floatval;
}sort_parray;

bool lessThan(sort_parray A, sort_parray B)
{
	return A.floatval < B.floatval;
}

void between_clade_score(vector<int>& between, vector< vector<char> >& clade1, vector< vector<char> >& clade2, int& col, vector<char>& blosum_index, vector<int>& blosum_matrix);
DistanceMatrix* ScoreDist(vector<string>& names, vector< string >& sequences);
vector<float> fd(char *outfile, char mat_file[], vector<char>& blosum_index, vector<int>& blosum_matrix, char * aminos, int sig, vector<string>& names, vector< string >& sequences, FDSet& fd_clades, TreeTemplate<Node>& tree_vector, int& diverge, int sup, string& fill, string& treestring, float threshold);
int output_file(char  *outp, char matrix_file[], int& num_out, vector< vector<float> >& resu, int& length_seq, vector< string >& clade1, int& size_clade1, vector< string >& clade2, int& size_clade2, vector< string >& outgroup, vector< string >& out_names, vector< string >& clade1_names, vector< string >& clade2_names, float& testval1, float& testval2, int sup, string& fill, FDSet& fd_clades, vector<char>& blosum_index, vector<int>& blosum_matrix,TreeTemplate<Node>& tree_vector, string& treestring);
int check_string_aminos(string& string);
int read_matrix(vector<int>& array, vector<char>& labels, FILE *file);
int get_res_index(vector<char>& b_index, char res);
int check_aminos(char& check);
int calculate_column_score(int& col, vector<string >& clade, vector< string >& clade2, int& out_index, vector<float>& b_scores, int& size_clade, vector<int>& blosum_matrix, vector<char>& blosum_index);
int column_conservation(vector<string >& clade, int& col, int& size_clade);
void check_string_unique(vector<char>& string, vector<char>& unique);
int within_clade_score(int& col, vector< string >& clade, int& clade_size, vector<int>& b_scores, vector<char>& blosum_index, vector<int>& blosum_matrix);
void get_column_residues(vector< string >& clade, int& clade_size, int& col, string& res_string);
float f_conservation(int& col, vector< string >& clade, int size_clade);
int read_sim(vector<FDSet>& temp_set, FDSet& fd_clades, vector<string>& temp_names, vector< string >& temp_seq);
int remove_false(vector<float>& results, vector<int>& locations, vector<float>& FDR_real, vector<int>& FDR_realoc, float& mean, float& SD, vector<double>& P_val);

int run_mode=1, matrix_type=0, gap_opt=0, neg_opt=1, cons_opt=0, clade_similar=1, cons_corr=0, dist_corr=1, illegal_char=0, gap=0, adhoc=0, cons_err=0, bad_chk;
int length_seq=0;
char ami[100];

template<class B>
float average(B *& array, int length){
	float total;

	total=0.0;
	for(int i=0; i<length;++i){

		total += array[i];
	}
	total /= length ;
	return total; 
}


float average_vf(vector<float>& array){
	float total=0.0;

	int length = array.size();
	for(int i=0;i<length;++i){

		total += array[i];

	}
	total =total/(float)length;

	return total;
}

float SD_vf(vector<float>& array, float mean){
	float total=0.0;
	int size = array.size();

	for(int i=0;i<size;++i){

		total += pow(((float)array[i] - mean),2);

	}
	total =total/size;

	return sqrt(total);
}

float average_vector(vector<int>& array){
	float total;
	int size = array.size();

	total=0.0;
	for(int i=0; i<size;++i){

		total += array[i];
	}
	total /= size;
	return total; 
}

/*===========================================================*/ 
/*===========================================================*/ 
/*===========================================================*/ 
template <class C>
float SE(C *numbers, float media, int size){
	int i;
	float Sum_squares = 0.0, var;


	for (i = 0; i < size; ++i){

		Sum_squares += (float)pow(((float)numbers[i] - (float)media), 2);
	}

	var = sqrt((float)Sum_squares/(float)(i+1));
	return var;

}


float SE_vf(vector<float>& numbers, float media){
	int i, num_siz;
	float Sum_squares = 0.0, var;

	num_siz = numbers.size();

	for (i = 0; i < num_siz; ++i){

		Sum_squares += pow(((float)numbers[i] - (float)media), 2);
	}

	var = sqrt((float)Sum_squares/(float)(i));
	return var;

}


int compare_ints (const void *X, const void *Y)
{
	int x = *((int *)X);
	int y = *((int *)Y);

	if (x > y)
	{
		return 1;
	}
	else
	{
		if (x < y)
		{
			return -1;
		}
		else
		{
			return 0;
		}
	}
}
int compare_floats (const void *X, const void *Y)
{
	float x = *((float *)X);
	float y = *((float *)Y);

	if (x > y)
	{
		return 1;
	}
	else
	{
		if (x < y)
		{
			return -1;
		}
		else
		{
			return 0;
		}
	}
}

/*=============================================================================*/
/*=============================================================================*/
/*=============================================================================*/


int fd_call(vector<int>& blosum_matrix, vector<char>& blosum_index, vector<string>& names, vector< string >& sequences, vector<FDSet>& fd_clades, vector< TreeTemplate<Node> > tree_vector, TagMatrix& summary_matrix, vector<string>& Tag1, string fill, int supress, char mat_file[], const char *infile, vector<string>& treestring, vector<int>& tag_fd, vector<int>& tag_total, map<string, int>& tag_map, float threshold, vector<string>& tag_vector){
	char aminos[23], outfile[1000], pwd[1000];

	int omatrix_type, oadhoc, ogap, oconserr, oillegal_char, oneg_opt, ocons_opt, ogap_opt, oclade_similar, odist_corr;
	omatrix_type=matrix_type; oadhoc=adhoc; ogap=gap; oconserr=cons_err; oillegal_char=illegal_char; oneg_opt=neg_opt; ocons_opt=cons_opt; ogap_opt=gap_opt; oclade_similar=clade_similar; odist_corr=dist_corr;



	strcpy(aminos, "-ARNDCEQGHILKMFPSTWYV ");
	strcpy(ami, "-ARNDCEQGHILKMFPSTWYV ");


	Pwd(pwd);


	int d=0;
	int first_file = 0;
	int tree_size = tree_vector.size();
	for(unsigned int l=0;l<tree_size;++l){

		int tag_size = Tag1.size();
		int clade1_size = fd_clades[l].clade1_names.size();
		for(unsigned int i=0;i<clade1_size;++i){
			for(unsigned int j=0;j<tag_size;++j){
				summary_matrix.incrementtotal(Tag1[j], fd_clades[l].clade1_names[i]);
			}
		}
		int clade2_size = fd_clades[l].clade2_names.size();
		for(unsigned int i=0;i<clade2_size;++i){
			for(unsigned int j=0;j<tag_size;++j){
				summary_matrix.incrementtotal(Tag1[j], fd_clades[l].clade2_names[i]);
			}
		}


		outfile[0]='\0';
		char output[1000];

		strcpy(output, infile);
		char tel[1000];
		sprintf(tel, ".%d.out", d);
		strcat(output, tel);
		++d;
		strcat(outfile, output);



		int diverge = 0;
		matrix_type=omatrix_type; adhoc=oadhoc; gap=ogap; cons_err=oconserr; illegal_char=oillegal_char; neg_opt=oneg_opt; cons_opt=ocons_opt; gap_opt=ogap_opt; clade_similar=oclade_similar; dist_corr=odist_corr;
		int y=0;
		fd(outfile, mat_file , blosum_index, blosum_matrix, aminos, y, names, sequences, fd_clades[l], tree_vector[l], diverge, supress, fill, treestring[l], threshold);


		vector<string> tem_vec;
		if(first_file==0){
			summary_matrix.Filenames.insert(pair<string, vector<string> >(fill, tem_vec));
			first_file=1;
		}

		summary_matrix.p = summary_matrix.Filenames.find(fill);
//map<string, int> tag_fd, tag_total;

static int tag_map_size=0;

		if(tag_size>0){

//cerr << "Tag[0]=" << Tag1[0] << endl;

			if(diverge ==0){
				if(tag_map.find(Tag1[0])==tag_map.end()){
					tag_map[Tag1[0]]=tag_map_size;
					tag_vector.push_back(Tag1[0]);
//cerr << "tag_push=" << Tag1[0] << endl;
					
					tag_total.push_back(1);
				}else{
					++tag_total[tag_map[Tag1[0]]];
				}
				//Do nothing, no functional divergence
			}else if(diverge == 1){

				if(tag_map.find(Tag1[0])==tag_map.end()){
//cerr << "tag_push=" << Tag1[0] << endl;
					tag_vector.push_back(Tag1[0]);
					tag_map[Tag1[0]]=tag_map_size;
					tag_total.push_back(1);
					tag_fd.push_back(1);
				}else{
					++tag_total[tag_map[Tag1[0]]];
					++tag_fd[tag_map[Tag1[0]]];
				}
			/*	if(tag_map.find(Tag[0])==tag_fd.end()){
					tag_fd[Tag[0]]=0;
					tag_fd[Tag[0]]=1;
				}else{
					++tag_fd[Tag[0]];
				}*/
				//Add clade 1 to the tag/species matrix
				int clade1_size = fd_clades[l].clade1_names.size();
				for(unsigned int i=0;i<clade1_size;++i){

					if(summary_matrix.p!=summary_matrix.Filenames.end()){
			//for(unsigned int j=0;j<Tag1.size();++j){
						int hit=0;
						for(unsigned int k=0;k<summary_matrix.p->second.size();++k){
							if(fd_clades[l].clade1_names[i]==summary_matrix.p->second[k]){
								hit=1;
							}
						}
						summary_matrix.increment(Tag1[0], fd_clades[l].clade1_names[i]);
						//	summary_matrix.incrementtotal(Tag1[0], fd_clades[l].clade1_names[i]);
						if(hit!=0){
							summary_matrix.p->second.push_back(fd_clades[l].clade1_names[i]);
						}
					}
				}


			}else if (diverge == 2){


				if(tag_map.find(Tag1[0])==tag_map.end()){
//cerr << "tag_push=" << Tag1[0] << endl;
					tag_map[Tag1[0]]=tag_map_size;
					tag_vector.push_back(Tag1[0]);
					tag_total.push_back(2);
					tag_fd.push_back(2);
				}else{
					++tag_total[tag_map[Tag1[0]]];
					++tag_fd[tag_map[Tag1[0]]];
				}
/*				if(tag_total.find(Tag[0])==tag_total.end()){
					tag_total[Tag[0]]=0;
					tag_total[Tag[0]]=1;
				}else{
					++tag_total[Tag[0]];
				}
				if(tag_fd.find(Tag[0])==tag_fd.end()){
					tag_fd[Tag[0]]=0;
					tag_fd[Tag[0]]=1;
				}else{
					++tag_fd[Tag[0]];
				}*/

				//Add clade 2 to the tag/species matrix
				int clade2_size = fd_clades[l].clade2_names.size();
				for(unsigned int i=0;i<clade2_size;++i){
					if(summary_matrix.p!=summary_matrix.Filenames.end()){
						int hit=0;
						for(unsigned int k=0;k<summary_matrix.p->second.size();++k){
							if(fd_clades[l].clade2_names[i]==summary_matrix.p->second[k]){
								hit = 1;
							}
						}
						summary_matrix.increment(Tag1[0], fd_clades[l].clade2_names[i]);
						if(hit!=0){
							summary_matrix.p->second.push_back(fd_clades[l].clade2_names[i]);
						}}
				}

			}else if(diverge ==3){


				if(tag_map.find(Tag1[0])==tag_map.end()){
//cerr << "tag_push=" << Tag1[0] << endl;
					tag_map[Tag1[0]]=tag_map_size;
					tag_vector.push_back(Tag1[0]);
					tag_total.push_back(2);
					tag_fd.push_back(2);
				}else{
					tag_total[tag_map[Tag1[0]]]+=2;
					tag_fd[tag_map[Tag1[0]]]+=2;
				}
				/*if(tag_map.find(Tag1[0])==tag_map.end()){
					tag_total[Tag1[0]]=0;
					tag_total[Tag1[0]]=2;
				}else{
					++tag_total[Tag1[0]];
				}
				if(tag_map.find(Tag1[0])==tag_map.end()){
					tag_fd[Tag1[0]]=0;
					tag_fd[Tag1[0]]=2;
				}else{
					tag_fd[Tag1[0]]+=2;
				}*/


				int clade1_size = fd_clades[l].clade1_names.size();
				int clade2_size = fd_clades[l].clade2_names.size();
				for(unsigned int i=0;i<clade1_size;++i){

					if(summary_matrix.p!=summary_matrix.Filenames.end()){
						//					for(unsigned int j=0;j<Tag1.size();++j){
						int hit=0;
						for(unsigned int k=0;k<summary_matrix.p->second.size();++k){
							if(fd_clades[l].clade1_names[i]==summary_matrix.p->second[k]){
								hit=1;
							}
						}
						summary_matrix.increment(Tag1[0], fd_clades[l].clade1_names[i]);
						//					summary_matrix.incrementtotal(Tag1[0], fd_clades[l].clade1_names[i]);
						if(hit!=0){
							summary_matrix.p->second.push_back(fd_clades[l].clade1_names[i]);
						}
						//					}
					}
				}

				for(unsigned int i=0;i<clade2_size;++i){
					if(summary_matrix.p!=summary_matrix.Filenames.end()){
						//					for(unsigned int j=0;j<Tag1.size();++j){
						int hit=0;
						for(unsigned int k=0;k<summary_matrix.p->second.size();++k){
							if(fd_clades[l].clade2_names[i]==summary_matrix.p->second[k]){
								hit = 1;
							}
						}
						summary_matrix.increment(Tag1[0], fd_clades[l].clade2_names[i]);
						//					summary_matrix.incrementtotal(Tag1[0], fd_clades[l].clade2_names[i]);
						if(hit!=0){
							summary_matrix.p->second.push_back(fd_clades[l].clade2_names[i]);
						}}
						//				}
				}
			}

		}

	}
	return 0;
}


/*=============================================================================*/
/*=============================================================================*/
/*=============================================================================*/

vector<float> fd(char *outfile, char mat_file[], vector<char>& blosum_index, vector<int>& blosum_matrix, char * aminos, int sig, vector<string>& names, vector< string >& sequences, FDSet& fd_clades, TreeTemplate<Node>& tree_vector, int& diverge, int sup, string& fill, string& treestring, float threshold){
	int size_outgroup, size_clade1, size_clade2, var_ok, integrate, hit=0, outgroup_index, conservation, cons_ok, non_gap, c1_sim;
	float clade1_mean, clade2_mean, clade1_SE, clade2_SE, divide_SE, mean_within, corrected_value, SD_SIG=0.0, mean_SIG=0.0;


	Newick * newickReader = new Newick(false); //No comment allowed!
	/*===========================================================================================================*/
	/*===========================================================================================================*/
	/*           This section is all of the significance test section really                                     */

	int length_seq = fd_clades.outgroup[0].size();
	if(sig==0){


		try {
			stringstream temp_stream;
			temp_stream.clear();
			TreeTemplate<Node> *tree = &tree_vector; 

			FILE *stream;
			stream = freopen("/dev/null", "w", stdout);
			vector<Node *> nodes = tree->getNodes();
			Node * nodey = TreeTemplateTools::cloneSubtree<Node>(*nodes[nodes.size()-1]);
			TreeTemplate<Node>  subtree(*nodey);

			vector<float> SIG;

			//int h= 400000%fd_clades.outgroup[0].size();
			//int r = ((400000-h)/fd_clades.outgroup[0].size())+1;	
			//			cerr << "r=" << r << endl;
			float thresh=1.0;
			float Old=0.0;
			int p=0, twice =0;
int P=0;
			while((thresh>=threshold&&twice!=2)&&P<1000 ){
		//		for(int q=0;q<100;++q){

++P;
				vector<string> Tnames;
				//vector< string > tseqs;
				vector< string > tsequences;
				create_seq(6, tree, Tnames, tsequences, length_seq);


				vector<FDSet> tem_set;
				//tem_set.clear();

				/*	for(unsigned int j=0;j<tseqs.size();++j){
					tsequences.push_back(tseqs[j]);
					}		
				 */

				read_sim(tem_set, fd_clades, Tnames, tsequences);

				vector< TreeTemplate<Node> > temp_tree_vector;

				temp_tree_vector.push_back(tree_vector);
				tem_set.push_back(fd_clades);


				vector<float> sigtest;


				char *tem=NULL;	
				sigtest = fd(tem, mat_file, blosum_index, blosum_matrix, aminos, 1, names, sequences, tem_set[0], temp_tree_vector[0], diverge, 1, fill, treestring, threshold);

				for(unsigned int k=0;k<sigtest.size();++k){
					SIG.push_back(sigtest[k]);
				}	
				sigtest.clear();

				float me = average_vf(SIG);
				float Average = SD_vf(SIG, me);
				if(p>=1){
					thresh = fabs(Old - Average); 
				}
				if (thresh<threshold){
					++twice;
				}else{
					twice =0;
				}
				//	cerr << "OLD=" << Old << "\tAver=" << Average << "\ttresh=" << thresh << "\tp=" << p << endl;	
				Old = Average;
				++p;

			}

			sort(SIG.begin(), SIG.end());
			mean_SIG = average_vf(SIG);
			SD_SIG = SD_vf(SIG, mean_SIG);	

			} catch (Exception e){
				cerr << "Error: Problems processing the tree in file: " << __FILE__ << " on line: " << __LINE__ << " of source code." << endl;
				exit(-1);
				vector<float> temp;

				return temp;
			}


		}

		/*===========================================================================================================*/
		/*===========================================================================================================*/
		//	length_seq = fd_clades.outgroup[0].size();
		vector<float> row(length_seq, 0.0);
		string residue_string;
		int sizo = fd_clades.outgroup_names.size();
		vector< vector<float> > resu(sizo, row);

		size_outgroup= fd_clades.outgroup.size();

		/*for(unsigned int i=0;i<fd_clades.outgroup_names.size();++i){
		  resu.push_back(row);
		  }
		 */

		vector<float> blosum_scores1;
		vector<float> blosum_scores2;

		size_clade1 = fd_clades.clade1.size();
		size_clade2 = fd_clades.clade2.size();
		size_outgroup = fd_clades.outgroup.size();


		for(int i=0;i<size_outgroup;++i){  //compares against each outgroup residue

			//		int v=0;
			for(int j=0;j<length_seq;++j){  //compares each column
				cons_ok=0;var_ok=0;gap=0;adhoc=0;integrate=0;hit=0;bad_chk=0;  // set a few test values to defaults

				hit = check_aminos(fd_clades.outgroup[i][j]);
				if(hit == 0){
					bad_chk=1;
				}
				outgroup_index = get_res_index(blosum_index, fd_clades.outgroup[i][j]);

				blosum_scores1.clear();
				blosum_scores2.clear();
				/*clear the scores from the previous run and then calculate the score for the column in clade1 and clade2*/
				calculate_column_score(j, fd_clades.clade1, fd_clades.clade2, outgroup_index, blosum_scores1, size_clade1, blosum_matrix, blosum_index);
				calculate_column_score(j, fd_clades.clade2, fd_clades.clade2, outgroup_index, blosum_scores2, size_clade2, blosum_matrix, blosum_index);
				/*check the conservation of the columns*/

				conservation = column_conservation(fd_clades.clade1, j, size_clade1);
				float conservation2 = column_conservation(fd_clades.clade2, j, size_clade2);

				if((conservation == 1.0 && fd_clades.clade1[0][j] == '-')||(conservation2 == 1.0 && fd_clades.clade2[0][j]=='-')){
					gap = 1;
				}

				/*if((cons_opt == 1) && (conservation == 1.0)){
					cons_ok = 1;
					//	}else if((cons_opt ==1) && (conservation == 0)){

			}else if(cons_opt ==0){
				cons_ok = 1;
			}*/

			cons_ok=1;
			if(cons_ok ==1){

				if(gap!=1){
					clade1_mean = average_vf(blosum_scores1);
					clade2_mean = average_vf(blosum_scores2);

					clade1_SE = SE_vf(blosum_scores1, clade1_mean);
					clade2_SE = SE_vf(blosum_scores2, clade2_mean);

					if(sig == 0){
						if(neg_opt == 0){
							var_ok =1;

						}else if(neg_opt ==1){
							integrate = 0;
							if(clade1_mean <0){
								++integrate;
							}

							if(clade2_mean >=0){
								++integrate;
							}

							if(integrate ==2){
								var_ok =1;
							}

						}
					}else{
						var_ok=1;
					}

					var_ok=1;
					cons_err =0;
					if(clade1_SE == 0 && clade2_SE == 0){ //if the clades are both fully conserved
						adhoc =1;
						blosum_scores1.clear();

						calculate_column_score(j, fd_clades.clade1, fd_clades.clade2, outgroup_index, blosum_scores1, size_clade1, blosum_matrix, blosum_index);
						clade1_mean = average_vf(blosum_scores1);
						clade1_SE = SE_vf(blosum_scores1, clade1_mean);

						if(clade1_SE == 0){
							cons_err = 1;
						}						
					}
					divide_SE = sqrt( (pow(clade1_SE,2)/fd_clades.clade1.size()) + ( pow(clade2_SE,2)/fd_clades.clade2.size()) );
					//					divide_SE = sqrt( (s/size_clade1) + (s/size_clade2) );

					residue_string.clear();
					non_gap = 0;
					get_column_residues(fd_clades.clade1, size_clade1, i, residue_string);
					int check=0;
					check = check_string_aminos(residue_string);

					if(check > 0){
						non_gap = 1;
					}

					c1_sim = 0;

					if(sig ==0){
						if(clade_similar ==1){

							if(cons_opt == 0){
								vector<int> within_scores;
								within_clade_score(j, fd_clades.clade1, size_clade1, within_scores, blosum_index, blosum_matrix);
								mean_within = average_vector(within_scores);

								if(mean_within >0){
									c1_sim = 1;
								}
							}else{

								c1_sim = 1;
							}
						}else{
							c1_sim = 1;
						}
					}else{
						c1_sim = 1;
					}



					if(cons_err!= 1){
						//++v;
						float test_value=0.0;
						if((clade1_mean - clade2_mean)!=0){
							test_value = ((clade1_mean - clade2_mean)/divide_SE);
						}else{
							test_value = 0.0;
						}
						float fraction_cons = f_conservation(j, fd_clades.clade1, fd_clades.clade1.size());

						if(cons_corr == 1){
							if(test_value!=0)
								corrected_value = test_value*fraction_cons;
							else
								corrected_value = 0.0; 
						}else{
							if(test_value!=0)
								corrected_value = test_value;
							else
								corrected_value = 0.0; 
						}

						if(matrix_type ==1){
							if((var_ok == 1) && (non_gap ==1) && (c1_sim ==1)){
								resu[i][j] = corrected_value;
							}
						}else if(matrix_type ==0){
							if((var_ok == 1) && (non_gap ==1) && (c1_sim ==1) && (bad_chk == 0)){
								resu[i][j] = corrected_value;
							}


						}else{
							fprintf(stderr, "Error: Unsupported matrix chosen.\n");
							exit(-1);
						}
					}
				}else{
				/*	if(matrix_type ==1){
						if((var_ok == 1) && (non_gap ==1) && (c1_sim ==1)){
							resu[i][j] = 0.0;
						}
					}else if(matrix_type ==0){
						if((var_ok == 1) && (non_gap ==1) && (c1_sim ==1) && (bad_chk == 0)){
							resu[i][j] = 0.0;
						}


					}else{

						fprintf(stderr, "Error: Unsupported matrix chosen.\n");
						exit(-1);
					}*/
							resu[i][j] = 0.0;

				}

			}//if cons_ok ==1 ends

			}//for j closes

		}//meta main loop ends

		vector<float> myreturn;


		if(sig == 0){

			diverge = output_file(outfile,  mat_file, size_outgroup, resu, length_seq, fd_clades.clade1, size_clade1, fd_clades.clade2, size_clade2, fd_clades.outgroup, fd_clades.outgroup_names, fd_clades.clade1_names, fd_clades.clade2_names, mean_SIG, SD_SIG, sup, fill, fd_clades, blosum_index, blosum_matrix, tree_vector, treestring);

		}else{

			vector<float> myret;
			for(int j=0;j<length_seq;++j){
				float temp=0.0;
				for(int i=0; i<size_outgroup;++i){
					temp += resu[i][j];		
				}
				myret.push_back(temp/(float)size_outgroup);

			}
			delete newickReader; 
			return myret;	
		}

		delete newickReader; 

		return myreturn;

	}

	/*=============================================================================*/
	/*=============================================================================*/
	/*=============================================================================*/

	void between_clade_score(vector<int>& between, vector< string >& clade1, vector< string >& clade2, int& col, vector<char>& blosum_index, vector<int>& blosum_matrix){

		int size_clade1 = clade1.size();
		int size_clade2 = clade2.size();
		for(unsigned int i=0;i<size_clade1;++i){
			for(unsigned int j=0;j<size_clade2;++j){
				int res1 = get_res_index(blosum_index, clade1[i][col]);			
				int res2 = get_res_index(blosum_index, clade2[j][col]);			
				between.push_back(blosum_matrix[(21*res1)+res2]);
			}
		}
	}


	/*=======================================================================================*/
	/*=======================================================================================*/
	/*=======================================================================================*/


	int read_sim(vector<FDSet>& temp_set, FDSet& fd_clades, vector<string>& temp_names, vector< string >& temp_seq){
		static int times1=0;


		string temp;
		FDSet tem;

		/*
		   for(int i=0;i<(signed)fd_clades.outgroup_names.size();++i){
		   tem.outgroup_names.push_back(fd_clades.outgroup_names[i]);
		   }
		   for(int i=0;i<(signed)fd_clades.clade1_names.size();++i){
		   tem.clade1_names.push_back(fd_clades.clade1_names[i]);
		   }
		   for(int i=0;i<(signed)fd_clades.clade2_names.size();++i){
		   tem.clade2_names.push_back(fd_clades.clade2_names[i]);
		   }
		 */
		for(int i=0;i<(signed)fd_clades.outgroup_names.size();++i){
			tem.outgroup_names.push_back(fd_clades.outgroup_names[i]);
			int t= compare_to_alignment(temp_names, fd_clades.outgroup_names[i]);
			if(t!=-1){
				string temp_char(temp_seq[t-1].begin(), temp_seq[t-1].end());
				tem.outgroup.push_back(temp_char);
			}

		}
		for(int i=0;i<(signed)fd_clades.clade1_names.size();++i){
			tem.clade1_names.push_back(fd_clades.clade1_names[i]);
			int t= compare_to_alignment(temp_names, fd_clades.clade1_names[i]);
			if(t!=-1){
				string temp_char(temp_seq[t-1].begin(), temp_seq[t-1].end());
				tem.clade1.push_back(temp_char);
			}

		}
		for(int i=0;i<(signed)fd_clades.clade2_names.size();++i){
			tem.clade2_names.push_back(fd_clades.clade2_names[i]);
			int t= compare_to_alignment(temp_names, fd_clades.clade2_names[i]);
			if(t!=-1){
				string temp_char(temp_seq[t-1].begin(), temp_seq[t-1].end());
				tem.clade2.push_back(temp_char);
			}
		}
		temp_set.push_back(tem);


		return 0;
	}



	/*=============================================================================*/
	/*=============================================================================*/
	/*=============================================================================*/

	int output_file(char  *outp, char matrix_file[], int& num_out, vector< vector<float> >& resu, int& length_seq, vector< string >& clade1, int& size_clade1, vector< string >& clade2, int& size_clade2, vector< string >& outgroup, vector< string >& out_names, vector< string >& clade1_names, vector< string >& clade2_names, float& testval1, float& testval2, int sup, string& fill, FDSet& fd_clades, vector<char>& blosum_index, vector<int>& blosum_matrix,TreeTemplate<Node>& tree_vector, string& treestring){
		/*	static int times1=0;

			struct timeval firsty,  second,  lapsed; 
			cerr << "before" << endl;
			struct timezone tzp;  
			gettimeofday (&firsty, &tzp); 
		 */	
		vector<float> temp_results;
		vector<float> final_results;
		vector<int> real_locations;
		vector< vector<float> > real_resu;
		vector<float> ultimate_resu;


		ofstream out;
		if(sup==0){

			try{
				out.open(outp);
			}catch(Exception e){
				cerr << "Error: couldn't open file: " << outp << endl;

			}

			out << "Functional divergence (fd) run for the file: " << fill << endl;
			//		out << "Parameters:  Matrix type =" << matrix_type << "Ignore gap-containing columns = " << gap_opt << "Require negative mean clade1/OG score = " << neg_opt << "Require total conservation = " << cons_opt << ", Require within-clade similarity = " << clade_similar << ", Correct by fraction of conserved residues = " << cons_corr << ", Correct BLOSOM scores by distance = " << dist_corr << ", Matrix file = " << matrix_file << endl;
			out << "Functional divergence analysis (using " << clade1_names.size() << "sequences in clade1, " << clade2_names.size() << " sequences in clade2 and "<< num_out << " outgroup sequences).\n\n";
			out << "Col\t\tScore\t\tResidues (C1/C2/Outgroup)\n";
		}


		if(num_out >=2){
			for(int j=0;j<length_seq;++j){ //loop through the nums

				float temp=0.0;
				for(int i=0;i<num_out;++i){
					temp+=resu[i][j];
				}
				if(temp!=0)
					temp/=(float)num_out;
				ultimate_resu.push_back(temp);
				real_locations.push_back(j+1);
			}
		}else{
			for(int i=0;i<length_seq;++i){
				ultimate_resu.push_back(resu[0][i]);
				real_locations.push_back(i+1);
			}
		}


		vector<float> temp_resu;
		vector<int> temp_loc;

		sort_parray final_result[length_seq];

		for(unsigned int i=0;i<ultimate_resu.size();++i){
			temp_resu.push_back((ultimate_resu[i]));
			temp_loc.push_back(i+1);
			final_result[i].intval = i;
			final_result[i].floatval=temp_resu[i];
		}


		std::sort(final_result, final_result+length_seq,lessThan);

		vector<float> final_resu;
		vector<int> final_loc;
		for(int i=0;i<length_seq;++i){
			final_resu.push_back(final_result[i].floatval);
			final_loc.push_back(final_result[i].intval);
		}


		vector<float> FDR;
		vector<int> FDR_loc;
		vector<double> P_val;

		remove_false(final_resu, final_loc, FDR, FDR_loc, testval1, testval2, P_val);
		//int remove_false(vector<float>& results, vector<int>& locations, vector<float>& FDR_real, vector<int>& FDR_realoc, float& mean, float& SD, vector<double>& P_val);

		if(sup==0){
			for(int i=0;i<(signed)FDR.size();++i){


				vector<int> between;
				between_clade_score(between, fd_clades.clade1, fd_clades.clade2, FDR_loc[i], blosum_index, blosum_matrix);
				float av_bet = average_vector(between);

			//	if(av_bet <=0){
					string clade1_string;
					string clade2_string;
					string og_string;
					get_column_residues(clade1, size_clade1, FDR_loc[i], clade1_string);
					get_column_residues(clade2, size_clade2, FDR_loc[i], clade2_string);
					get_column_residues(outgroup, num_out, FDR_loc[i], og_string);


					out <<  FDR_loc[i]+1 << "\t\t" <<  FDR[i] << "\t\t";
					for(unsigned int g=0;g<clade1_string.size();++g){
						out << clade1_string[g];
					}
					out << "/";
					for(unsigned int g=0;g<clade2_string.size();++g){
						out <<  clade2_string[g];
					}
					out <<  "/";
					for(unsigned int g=0;g<og_string.size();++g){
						out << og_string[g];
					}
					out << "\t\tP_val = " <<  P_val[i] << endl;

			//	}

			}

			out << "\nInput file:\n";

			for(int i=0;i<num_out;++i){
				out << ">" << out_names[i] << endl;
				out << &outgroup[i].at(0) << endl;
			}
			out <<  "[OUTGROUP]\n";
			for(int i=0;i<size_clade1;++i){
				out << ">" << &clade1_names.at(i).at(0) << endl;
				out << &clade1.at(i).at(0) << endl;
			}
			out <<  "[CLADE]\n";
			for(int i=0;i<size_clade2;++i){
				out <<  ">" << &clade2_names.at(i).at(0) << endl;
				out << &clade2.at(i).at(0) << endl;
			}
		}


		int div1=0, div2=0;
		for(unsigned int i=0;i<FDR.size();++i){
			if(FDR[i]<0.0){
				div1=1;
			}
			if(FDR[i]>0.0){
				div2=2;
			}
		}

		if(sup==0){
			out << endl;	
			out << treestring;
			out.close();
			//		static string tempstring = TreeTemplateTools::treeToParenthesis(tree_vector, false);
			//		cerr << tempstring << endl;
			//		Newick * newickReader = new Newick(false); //No comment allowed!
			//		newickReader->write(tree_vector, outp, false);
		}


		return div1+div2;


	}


	/*=============================================================================*/
	/*=============================================================================*/
	/*=============================================================================*/

	int remove_false(vector<float>& results, vector<int>& locations, vector<float>& FDR_real, vector<int>& FDR_realoc, float& mean, float& SD, vector<double>& P_val_real){

		vector<float> sorted(results.begin(), results.end());
		sort(sorted.begin(), sorted.end());

		float alpha = 0.05;
		int bottom = results.size()-1, top=0;
		int k=0, i=0;
		int m=results.size();


		vector<double> P_val;
		vector<int> FDR_loc;
		vector<float> FDR;

		while(i<(signed)results.size()){


//cerr << "line=" << __LINE__ << endl;
			if(fabs(results[top]-mean) <=fabs(results[bottom]-mean)){
				//if(fabs(P_val[top]) <=fabs(P_val[bottom])){
				P_val.push_back(2*(1-gsl_cdf_gaussian_P((double)fabs(results[bottom]-mean), SD))); 
				FDR.push_back(results[bottom]);
				FDR_loc.push_back(locations[bottom]);
				--bottom;

			}else{
				P_val.push_back(2*(1-gsl_cdf_gaussian_P((double)fabs(results[top]-mean), SD))); 
				FDR.push_back(results[top]);	
				FDR_loc.push_back(locations[top]);
				++top;
			}
			++k;	
			++i;	
			}
			k=1;
			int j;
			for(j=FDR.size()-1;j>0;--j){
				//for(j=0;j>FDR.size();++j){
				double alval = (((double)k/(double)m)*(double)alpha);
				//cerr << "k=" << k << "\tm=" << m << "\talpha=" << alpha << "\talval=" << alval << "\tP_val[i]=" << (double)(1-P_val[j]) << endl;
				if((double)P_val[j]<=alval){
					break;
				}
				++k;
			}


			for(k=0;k<j;++k){
				FDR_real.push_back(FDR[k]);
				FDR_realoc.push_back(FDR_loc[k]);
				P_val_real.push_back(P_val[k]);
			}

			return 0;
			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*=============================================================================*/

			DistanceMatrix* ScoreDist(vector<string>& names, vector< string >& sequences){

				DistanceMatrix *DS;
				Alphabet *alphabet = new ProteicAlphabet();
				DiscreteDistribution * rdist = new ConstantDistribution(1.);
				const ProteicAlphabet * alpha = dynamic_cast<const ProteicAlphabet *>(alphabet);
				SubstitutionModel * model = new JTT92(alpha);
				VectorSequenceContainer *vsc = new VectorSequenceContainer(alphabet);

				int nam_siz=names.size();	
				for(unsigned int i=0;i<nam_siz;++i){

					std::string mystr(sequences[i]);
					std::string tem(names[i]);
//cerr << "names[i]=" << names[i] << endl;
					vsc->addSequence(Sequence(names[i], sequences[i], alphabet));
				}

				VectorSiteContainer * sites = new VectorSiteContainer(alphabet);

				for(unsigned int i=0;i<nam_siz;++i){

					const Sequence *myseq = vsc->getSequence(names[i]);
					sites->addSequence(*myseq, true);	
				}
				SiteContainerTools::changeGapsToUnknownCharacters(*sites);

				DistanceEstimation MyDS(model, rdist, sites, 1, true);

				delete sites;
				delete vsc;
				delete alphabet;
				delete rdist;
				DS = MyDS.getMatrix();


				return DS;
			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*=============================================================================*/

			int check_aminos(char& check){

				for(int i=0;i<21;++i){//only wanna check for aminos or '-' not space
					if(check==ami[i]){

						return 1;
					}
				}
				return 0;
			}


			/*=============================================================================*/
			/*=============================================================================*/
			/*=============================================================================*/

			int read_matrix(vector<int>& array, vector<char>& labels, FILE *file){

				int i, j;
				char temp[100], lab[10];

				labels.resize(21);
				for(i=0;i<21;++i){

					fscanf(	file, "%s", lab);
					labels[i] = lab[0];

					for(j=0;j<21;++j){
						fscanf(file, "%s", temp);
						array.push_back(atoi(temp));
					}
				}
				return 0;
			}
			/*=============================================================================*/
			/*=============================================================================*/
			/*=============================================================================*/

			int get_res_index(vector<char>& b_index, char res){

				//	static int times1=0;


				int res_index=0;
				if(gap_opt == 1){     //user decided to avoid testing columns containing gaps
					if(res == '-'){
						res_index = 1;
						gap = 1;


						return res_index;
					}
				}

				for(int p=0;p<21;++p){
					if(res == b_index[p]){
						res_index = p;

						return res_index;
					}	
				}

				return res_index;
			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*=============================================================================*/

			int calculate_column_score(int& col, vector<string >& clade, vector< string >& clade2, int& out_index, vector<float>& b_scores, int& size_clade, vector<int>& blosum_matrix, vector<char>& blosum_index){


				int i, hit, test_index, new_score, diff_index, test_col;
				char test_res, diff_res;



				b_scores.resize(size_clade);
				for(i=0;i<size_clade;++i){

					test_res = clade[i][col];

					hit = 0;
					hit = check_aminos(test_res);	
					if(hit ==0){
						bad_chk = 1;
					}else{
						bad_chk = 0;
					}
					test_index = get_res_index(blosum_index, test_res);
					int score;
					int tot=21*test_index;
					tot+=out_index;

				//if(test_res=="X"){
				/*cerr << "test_res=" << test_res << endl;
				cerr << "score=" << score << endl;
				cerr << "test_index=" << test_index << endl;
				cerr << "tot=" << tot << endl;*/
			//	}

					if(bad_chk == 0){
						score = blosum_matrix[(21*test_index)+out_index];
					}else{

						score =0;
					}	
					b_scores[i] = (float)score;	

				}

				if(adhoc == 1){
					test_col = col+1;
					diff_res = clade2[0][col];
					diff_index = get_res_index(blosum_index, diff_res);

					if(bad_chk == 0){
						new_score = blosum_matrix[(21*diff_index)+out_index];	
					}else{
						new_score = 0;
					}
					b_scores[0]=(float)new_score;
				}

				return 0;
			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*=============================================================================*/

			void get_column_residues(vector< string >& clade, int& clade_size, int& col, string& res_string){


				char string[20];
				vector<char> clade_residues; 
				vector<char> unique_residues; 

				clade_residues.resize(clade_size);
				for(int i=0;i<clade_size;++i){
					clade_residues[i] = clade[i][col];
				}
				check_string_unique(clade_residues, unique_residues);

				int uniq_size = unique_residues.size();
				int clade_res_size = clade_residues.size();
				for(int i=0;i<uniq_size;++i){
					int count = 0;
					for(int item=0;item<clade_res_size;++item){
						if(unique_residues[i] == clade_residues[item]){
							++count;
						}
					}
					res_string.push_back(unique_residues[i]);
					res_string.push_back('(');
					sprintf(string, "%d", count);
					for(unsigned int j=0;j<strlen(string);++j)
						res_string.push_back(string[j]);
					res_string.push_back(')');
					res_string.push_back(' ');

				}


			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*=============================================================================*/

			void check_string_unique(vector<char>& string, vector<char>& unique){

				int hit=0;

				unique.push_back(string[0]);
				int string_size = string.size();
				for(int i=1;i<string_size;++i){
					hit=0;
					for(int k=0;k<(signed)unique.size();++k){
						if(string[i]==unique[k]){
							++hit;
							break;
						}
					}
					if(hit==0){
						unique.push_back(string[i]);
					}	


				}
			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*		checks a string to see if the entries are amino acids	       */
			/*=============================================================================*/

			int check_string_aminos(string& string){



				int hit=0;
				int string_size = string.size();

				for(int i=0;i<string_size;++i){
					if(string[i]!='-'){
						++hit;
						break;
					}

				}


				return hit;
			}


			/*=============================================================================*/
			/*=============================================================================*/
			/*              gets blosum scores comparing within a clade                    */
			/*=============================================================================*/

			int within_clade_score(int& col, vector< string >& clade, int& clade_size, vector<int>& b_scores, vector<char>& blosum_index, vector<int>& blosum_matrix){

				/*	static int times=0;

					struct timeval firsty,  second,  lapsed; 
					cerr << "before" << endl;
					struct timezone tzp;  
					gettimeofday (&firsty, &tzp); 
				 */	 

				int i, j, m_score, res1_index, res2_index;
				char res2;

				int i_index = clade_size-2;
				int j_index = clade_size-1;



				for(i=0;i<=i_index;++i){
					//res1=clade[i][col];
					res1_index = get_res_index(blosum_index, clade[i][col]);
					int check1=0;
					check1= check_aminos(clade[i][col]);
					for(j=i+1;j<=j_index;++j){

						res2=clade[j][col];
						int check2=0;
						check2= check_aminos(res2);
						//		bad_chk=0;
						if((check1!=1)&&(check2!=1)){
							bad_chk=1;
						}

						res2_index = get_res_index(blosum_index, res2);
						if(bad_chk==0){
							m_score = blosum_matrix[(21*res1_index)+res2_index];
						}else{
							m_score = 0;
						}

						b_scores.push_back(m_score);
					}

				}
				/*gettimeofday (&second, &tzp); 
				  if (firsty.tv_usec > second.tv_usec) {  
				  second.tv_usec += 1000000;  
				  second.tv_sec--; 
				  } 
				  lapsed.tv_usec = second.tv_usec - firsty.tv_usec; 
				  lapsed.tv_sec = second.tv_sec - firsty.tv_sec; 
				  static int micros=0;
				  micros += lapsed.tv_usec;
				  times += lapsed.tv_sec;

				  if(micros >=1000000){
				  times+=1;
				  micros -= 1000000;
				  }

				  cerr << "time spent in within_clade = " << times << " seconds." << endl;
				 */

				return 0;

			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*                    check the amino acid conservation                        */
			/*=============================================================================*/

			int column_conservation(vector< string >& clade, int& col, int& size_clade){

				/*static int times1=0;

				  struct timeval firsty,  second,  lapsed; 
				  cerr << "before" << endl;
				  struct timezone tzp;  
				  gettimeofday (&firsty, &tzp); 
				 */	 



				int cons, i;

				cons=1;
				for(i=0;i<size_clade-1;++i){
					if(clade[i][col]!=clade[i+1][col]){
						cons=0;
						break;
					}
				}


				/*gettimeofday (&second, &tzp); 
				  if (firsty.tv_usec > second.tv_usec) {  
				  second.tv_usec += 1000000;  
				  second.tv_sec--; 
				  } 
				  lapsed.tv_usec = second.tv_usec - firsty.tv_usec; 
				  lapsed.tv_sec = second.tv_sec - firsty.tv_sec; 
				  static int micros=0;
				  micros += lapsed.tv_usec;
				  times1 += lapsed.tv_sec;

				  if(micros >=1000000){
				  times1+=1;
				  micros -= 1000000;
				  }

				  cerr << "time spent in column_conservation = " << times1 << " seconds." << endl;
				 */	

				return cons;
			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*         calculates the fraction of the clade which is conserved             */
			/*=============================================================================*/

			float f_conservation(int& col, vector< string >& clade, int size_clade){

				/*static int times1=0;

				  struct timeval firsty,  second,  lapsed; 
				  cerr << "before" << endl;
				  struct timezone tzp;  
				  gettimeofday (&firsty, &tzp); 
				 */	

				int i, j;
				float conservation;

				vector<char> residue;
				vector<char> column;

				for(int y=0;y<size_clade;++y){
					column.push_back(clade[y][col]);
				}
				check_string_unique(column, residue);
				int res_size = residue.size();
				int *totals = new int[res_size];

				for(i=0;i<res_size;++i){
					totals[i] = 0;
					for(j=0;j<size_clade;++j){
						if(clade[j][col] == residue[i]){
							++totals[i];
						}
					}
				}
				qsort(totals, res_size, sizeof(int), compare_ints);
				conservation = (float)totals[0]/ (float)size_clade;

				delete [] totals;


				/*gettimeofday (&second, &tzp); 
				  if (firsty.tv_usec > second.tv_usec) {  
				  second.tv_usec += 1000000;  
				  second.tv_sec--; 
				  } 
				  lapsed.tv_usec = second.tv_usec - firsty.tv_usec; 
				  lapsed.tv_sec = second.tv_sec - firsty.tv_sec; 
				  static int micros=0;
				  micros += lapsed.tv_usec;
				  times1 += lapsed.tv_sec;

				  if(micros >=1000000){
				  times1+=1;
				  micros -= 1000000;
				  }

				  cerr << "time spent in f_cons = " << times1 << " seconds." << endl;
				 */	





				return conservation;
			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*=============================================================================*/
			void Pwd(char *pwd){
				getcwd(pwd, 2000);	
				strcat(pwd, "/");
			}

			/*=============================================================================*/
			/*=============================================================================*/
			/*=============================================================================*/
