
/*

Copyright 2010 Brian Caffrey, Tom Williams, Mario Fares.


this file is part of Clusterfunc.

    Clusterfunc is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    Clusterfunc is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Clusterfunc.  If not, see <http://www.gnu.org/licenses/>.


*/
#include<stdio.h>
#include<getopt.h>
#include<stdlib.h>/* use of NULL, new/malloc */
#include<cstring>
#include<string>
#include<string.h>
#include<math.h>
#include<dirent.h>
#include<unistd.h>
#include<sys/stat.h>
#include<sys/time.h>
#include<getopt.h>
#include<vector>
#include<gsl/gsl_randist.h>
#include<gsl/gsl_rng.h>
#include<gsl/gsl_cdf.h>
#include <iostream>
#include <fstream>
#include <iomanip>

// From SeqLib:
#include <Seq/Alphabet.h>
#include <Seq/VectorSiteContainer.h>
#include <Seq/SequenceApplicationTools.h>
#include <Seq/SiteTools.h>
#include <Seq/AlignedSequenceContainer.h>
#include <Seq/SequenceApplicationTools.h>

// From PhylLib:
#include <Phyl/TreeTemplate.h>
#include <Phyl/PhylogeneticsApplicationTools.h>
#include <Phyl/NonHomogeneousSequenceSimulator.h>
#include <Phyl/SequenceSimulationTools.h>
#include <Phyl/SubstitutionModelSetTools.h>
#include <Phyl/Newick.h>
#include<Phyl/DistanceEstimation.h>
#include <Phyl/BioNJ.h>

// From NumCalc:
#include <NumCalc/DiscreteDistribution.h>
#include <NumCalc/ConstantDistribution.h>
#include <NumCalc/DataTable.h>
#include <NumCalc/AutoParameter.h>
// From Utils:
#include <Utils/AttributesTools.h>
#include <Utils/FileTools.h>
#include <Utils/ApplicationTools.h>
#include <Utils/Number.h>


using namespace bpp;





struct FDSet{
	vector<string> clade1_names;
	vector< string > clade1;
	vector<string> clade2_names;
	vector< string > clade2;
	vector<string> outgroup_names;
	vector< string > outgroup;
};


class TagMatrix{

	public:
		int getCol(std::string Tag);
		int getRow(std::string Species);
		void MakeTree();
		void increment(std::string Tag, std::string Species);
		int getTagSize();
		void incrementtotal(std::string Tag, std::string Species);
		void enrichment();
		map<string, vector<string> > Filenames;
		map<string, vector<string> >::iterator p;
		vector< vector<int> > fdtotal;
	private:
		vector< vector<int> > speciestotal;
		vector< vector<int> > enriched;
		vector< vector<int> > species_tag_solution;
		vector< int > tag_enrichment;
		vector<int> species_enrichment;
		vector<std::string> TagNames;
		vector<std::string> SpeciesNames;
/*		void Species_tag_enrichment(vector< vector<int> >& fd, vector< vector<int> >& total, vector< vector<int> >& species_tag_enrichment);
		void Species_enrichment(vector< vector<int> >& fd, vector< vector<int> >& total, vector<int>& species_enrichment);
		void Tag_enrichment(vector< vector<int> >& fd, vector< vector<int> >& total, vector<int>& tag_enrichment);
*/		void AddTagSpec(std::string tag_name, std::string Spec_name);
		void AddSpecies(std::string species_name);
		void AddTag(std::string species_name);
		map<string, int> tag_map;
		map<string, int> species_map;
};

//int fd_call(vector<int>& blosum_matrix, vector<char>& blosum_index, vector<string>& names, vector< string >& sequences, vector<FDSet>& fd_clades, vector< TreeTemplate<Node> > tree_vector, TagMatrix& summary_matrix, vector<string>& Tag1, string fill, int supress, char mat_file[], const char *infile, vector<string>& treestring);
int fd_call(vector<int>& blosum_matrix, vector<char>& blosum_index, vector<string>& names, vector< string >& sequences, vector<FDSet>& fd_clades, vector< TreeTemplate<Node> > tree_vector, TagMatrix& summary_matrix, vector<string>& Tag1, string fill, int supress, char mat_file[], const char *infile, vector<string>& treestring, vector<int>& tag_fd, vector<int>& tag_total, map<string, int>& tag_map, float threshold, vector<string>& tag_vector);
int make_setss(TreeTemplate<Node>& tree, vector<string>& outgroup, vector< TreeTemplate<Node> >& tree_vector, vector<string>& seq_names, vector< string >& sequences, vector<FDSet>& fd_clades, TreeTemplate<Node>& original, vector<string>& treestring);
int Correlation(vector<int>& spec_totala, vector<int>& spec_totalb);
int fd_tree(TreeTemplate<Node> *tree, vector<string>& seq_names, vector< string >& sequences, vector<FDSet>& fd_clades, vector< TreeTemplate<Node> >& tree_vector, vector<string>& treestring); 
vector<string> printDirectoryContent(char *dir, char *pwd);
DistanceMatrix* ScoreDist(vector<string>& names, vector< string >& sequences);
int read_matrix(vector<int>& array, vector<char>& labels, FILE *file);
//TreeTemplate<Node> *create_input_tree(char *input_file, vector<string>& seq_names, vector< string >& sequences, vector<string>& Tag1, int first);
TreeTemplate<Node> *create_input_tree(char *input_file, vector<string>& seq_names, vector< string >& sequences, vector<string>& Tag1, int first);
int Read_fasta(vector<string>& names, vector< string >& sequences, char *inname, vector<string>& Tag1);
int compare_to_alignment(vector<string>& names, string tree_name);
void Pwd(char *pwd);
