// PeakAnnotator.cpp : Defines the entry point for the console application.
//

#include "PeakAnnotator.h"
#include <time.h>
#include <memory>
using namespace std;

PeakAnnotator::PeakAnnotator(string peakFile, string geneFile, string outputFile, string symbolFile,bool mustStrand, 
							 string chrSizeFile, int numRandomDataSets)
{
	//Read and Parse input files and group the positions by chromosomes
	Initialize(peakFile,geneFile,outputFile,symbolFile,mustStrand,chrSizeFile,numRandomDataSets);
}

void PeakAnnotator::Initialize(string peakFile, string geneFile, string outputFile, string symbolFile,bool mustStrand,
							   string chrSizeFile, int numRandomDataSets)
{
	cout << "Starting..." << endl;
	
	//open output file
	m_NumRandomDataSets = numRandomDataSets;
	m_outFile.open(outputFile.c_str());
	if(!m_outFile.is_open())
		throw "Failed to open output file: " + outputFile;

	//keep input file names
	m_File1Name = peakFile;
	m_File2Name = geneFile;

	//keep output file names
	m_OutFileName = outputFile;
	m_OverlapFileName = "Overlap_" + outputFile;	

	
	GeneralParser* general = NULL;
	//The key of the map is accession number (ID) and the value is the gene's symbol
	map <string,string> IDMap;
	bool hasSymbols = false;
	if(symbolFile != "NULL")
	{
		hasSymbols = true;
		general = new GeneralParser(symbolFile);
		cout << "Reading Symbol File" << endl;
		vector<string>* symbols;
		//parse line by line of the esymbol file
		//the first field of the file is the ID and the second is the symbol
		//the ID,Symbol pair are stored in the map for later lookup
		while((symbols = general->ParseLine()) != NULL)
		{
			IDMap[(*symbols)[0]] = (*symbols)[1];
			delete symbols;
		}
		delete general;
	}

	if(chrSizeFile != "NULL")
	{
		general = new GeneralParser(chrSizeFile);
		vector<string>* Sizes = general->ParseLine();
		while(Sizes!=NULL)
		{
			string chr = (*Sizes)[0];
			if(chr.substr(0,3) != "CHR" &&
			   chr.substr(0,3) != "chr")
			{
				chr = "chr" + (*Sizes)[0];
			}
			if(chr == "chrMT")
			{
				chr = "chrM";
			}			
			m_SizeMap[chr] = AtoI((*Sizes)[1]);
			delete Sizes;
			Sizes = general->ParseLine();
		}
		delete general;
	}


	//this map is used to mark which of the chromosomes are only in BED file or only in Peak file or in both
	//chromosomes that are in both will have value > 1
	//chromosomes that are only in BED file will have value = 1
	//chromosomes that are only in Peak file will have value = 0
	map<string,int> ChromMap;
	map<string,int>::iterator ChromMapIter;

	//BED Parser is used to parse the BED file into objects of type "Position"
	cout << "Reading BED File" << endl;
	BEDParser* bed = new BEDParser(geneFile);
	Position* g = NULL;
	Position* prev = NULL;

	//During the parsing of the files we build for each gene a list of genes containing it
	//the way we do it is by maintaining a queue which on every step holds all genes containing the current gene.
	//if the previous gene contains the current gene - the previous gene is added to the queue
	//In this case all genes already in queue should remain there becuase if gene X contains gene X+1 
	//and gene X+1 contains gene X+2 then obviously gene X also contains gene X+2
	//otherwise - all genes not containing the current gene should be removed from the queue.

	while((g = bed->ParseLine()) != NULL)
	{	
		if(hasSymbols)
		{
			g->m_Symbol = IDMap[g->m_Id];
		}
		//mark the chromosome as one that was found in BED file
		ChromMap[g->m_Chromosome] = 1;
		if(m_MapOfGenes[g->m_Chromosome] == NULL)
		{
			m_MapOfGenes[g->m_Chromosome] = new vector<Position*>();
		}
		//make sure there is strand information
		//this is required only when searching for closest downstream gene
		if(mustStrand && (g->m_Strand == Strand_Unknown))
		{
			string err = "Strand information is missing";
			throw err;
		}		
		if(prev != NULL)
		{
			//need to check the the files are sorted
			if((prev->m_Chromosome == g->m_Chromosome)&&
				((prev->m_Start > g->m_Start) ||
				((prev->m_Start == g->m_Start) && (prev->m_End > g->m_End))))
			{
				string err = "Files must be sorted by chromosomes,start position, end position";
				throw err;
			}

			//arrange containers
			//iterate over all chromosomes
			//map<string,vector<Position*>* >::iterator vecItr;
			
        	vector<Position*>* vec = m_MapOfGenes[g->m_Chromosome];
			//sort(vec->begin(), vec->end(), comparePositions());
        	//check for container
        	for(int i=vec->size()-1;i>0;i--)
        	{
        		Position* left = vec->at(i);  
	        	if(left->m_End < g->m_End)
	        	{
	        		//go over all genes containing left 
		        	list<Position*>::iterator itr = left->m_Containers->begin();
					for(itr = left->m_Containers->begin(); itr != left->m_Containers->end(); ++itr)
					{       
		            	Position* p = *itr;
		            	if(g->IsContained(*p)){
		            		g->m_Containers->push_back(p);
		            	}	
		            }
	        		break;
	        	}
	        	else if(g->IsContained(*left))
	        	{
	        		g->m_Containers->push_back(left);
	        	}
			}
		}

		//add the current gene to the list of genes in its chromosome		
		m_MapOfGenes[g->m_Chromosome]->push_back(g);		
		prev = g;
	}	
	delete bed;

	//we use the same BED parserto parse the peak file too
	BEDParser* peak = new BEDParser(peakFile);
	Position* p = NULL;
	prev = NULL;
	cout << "Reading Peak File" << endl;
	while((p = peak->ParseLine()) != NULL)
	{		
		if((prev != NULL) && 
			(prev->m_Chromosome == p->m_Chromosome)&&
			((prev->m_Start > p->m_Start) ||
			((prev->m_Start == p->m_Start) && (prev->m_End > p->m_End))))
		{
			string err = "Files must be sorted by chromosomes,start position, end position";
			throw err;
		}
		//add the current gene to the list of genes in its chromosome
		if(m_MapOfPeaks[p->m_Chromosome] == NULL)
		{
			m_MapOfPeaks[p->m_Chromosome] = new vector<Position*>();
		}
		m_MapOfPeaks[p->m_Chromosome]->push_back(p);
		if(ChromMap[p->m_Chromosome] > 0) // is also in BED file
		{			
			ChromMap[p->m_Chromosome]++;
		}		
		prev = p;
	}	
	delete peak;
	
	for(ChromMapIter = ChromMap.begin();ChromMapIter != ChromMap.end();ChromMapIter++)
	{
		if(ChromMapIter->second == 0)
		{
			//chromosome is only in peak file and not in BED file
			cout << "Chromosome " << ChromMapIter->first  << " not found in BED file" << endl;
			//add empty vector to the second map
			m_MapOfGenes[ChromMapIter->first] = new vector<Position*>();
		}
		if(ChromMapIter->second == 1)
		{
			//chromosome is only in BED file and not in Peak file
			cout << "No genomic location detected in chromosome " << ChromMapIter->first << endl;
			//add empty vector to the first map
			m_MapOfPeaks[ChromMapIter->first] = new vector<Position*>();
		}
	}
}


void PeakAnnotator::ClosestDownstreamGenes()
{		
	m_outOverlapFile.open(m_OverlapFileName.c_str());
	if(!m_outOverlapFile.is_open())
		throw "Failed to open output file: " + m_OverlapFileName;

	//print headers	
	m_outFile << "Chromosome"<<"\t"<<"Start"<<"\t"<<"End"<<"\t"<<"#Overlaped_Genes"		 
		 <<"\t"<<"Downstream_FW_Gene"<<"\t"<<"Symbol"<<"\t"<<"Distance"
		 <<"\t"<<"Downstream_REV_Gene"<<"\t"<<"Symbol"<<"\t"<<"Distance"<<endl;

	m_outOverlapFile << "Chromosome"<<"\t"<<"Start"<<"\t"<<"End"<<"\t"<<"OverlapGene"
		 <<"\t"<<"Symbol"<<"\t"<<"Overlap_Begin" << "\t"<<"Overlap_Center"<<"\t"<<"Overlap_End"<<endl;	

	map<string,vector<Position* >* >::const_iterator itr;	
	//run over the chromosomes
	for(itr = m_MapOfPeaks.begin(); itr != m_MapOfPeaks.end(); ++itr)
	{
		//Get the relevant vectors for the current choromosome
		vector<Position*> *vecPeak = (vector<Position*>*)(itr->second);
		vector<Position*> *vecGenes = (vector<Position*>*)m_MapOfGenes[(itr->first)];		

		//run over the peaks - for each peak find the closest gene
		for(int i=0;i<vecPeak->size();i++)
		{
			int temp;
			GeneElement* overlapStart;
			GeneElement* overlapEnd;
			GeneElement* overlapCentral;
			int numOfOverlapGenes = 0;
			bool foundClosestDownstreamPosGene = false;
			Position* ClosestDownstreamPosGene = NULL;
			int ClosestDownstreamPosDistance=0;

			//look for the closest gene to the right of the peak. This gene.start > peak.end.
			int closestPositionIndex = FindRightAdjacentGene(vecGenes,vecPeak->at(i)->m_End);
			if(closestPositionIndex < 0)
			{
				//no such element no need to move right
				closestPositionIndex = vecGenes->size();
			}
			temp = closestPositionIndex;		
							
			//move right until found closest downstream gene. the genes here cannot overlap the peak
			//because start gene > end peak
			while(foundClosestDownstreamPosGene == false && temp < vecGenes->size())
			{
				ClosestDownstreamPosGene = vecGenes->at(temp++);
				if(ClosestDownstreamPosGene->m_Strand==Strand_Positive)
				{
					//we found the closest downstream gene of the positive strand
					foundClosestDownstreamPosGene=true;

					//for positive strand the distance is calculated as the peak's Central Point - Gene's start point
					ClosestDownstreamPosDistance = abs(ClosestDownstreamPosGene->Minus(vecPeak->at(i)));
				}			
			}
						
			temp = closestPositionIndex;
			bool foundClosestDownstreamNegGene = false;			
			int ClosestDownstreamNegDistance = 0;			
			Position* ClosestDownstreamNegGene = NULL;			
			//start moving left from the gene we found in the binary search
			//when moving left - genes can overlap the peak
			while(foundClosestDownstreamNegGene == false && temp > 0)			
			{
				ClosestDownstreamNegGene = vecGenes->at(--temp);
				if(ClosestDownstreamNegGene->Overlap(vecPeak->at(i)))
				{					
					ClosestDownstreamNegGene->FindOverlapGeneElement(vecPeak->at(i),overlapStart,overlapCentral,overlapEnd);
					PrintOverlapPosition(ClosestDownstreamNegGene,vecPeak->at(i),overlapStart,overlapCentral,overlapEnd);
					numOfOverlapGenes ++;
				}
				else if(ClosestDownstreamNegGene->m_Strand == Strand_Negative)
				{
					//if the gene doesn't overlap and is negative strand - we found a candidate to be the closest downstream gene
					//other genes that containing this gene were not checked yet (becasue their start position is smaller then this
					//gene's start position, but their end position is bigger that this gene's end position - because they contain it
					foundClosestDownstreamNegGene = true;

					//for positive strand the distance is calculated as the Gene's end point minus the peak's Central Point
					ClosestDownstreamNegDistance = abs(ClosestDownstreamNegGene->Minus(vecPeak->at(i)));					

					//check if its containers overlaps or closest negative strand downstream gene
					list<Position*>::iterator itr;
					Position* currentPosition = ClosestDownstreamNegGene;
					for(itr = currentPosition->m_Containers->begin(); itr != currentPosition->m_Containers->end(); ++itr)
					{
						if((*itr)->Overlap(vecPeak->at(i)))
						{
							//if the containing gene overlpas - we want to print it but this is not a downstream gene
							(*itr)->FindOverlapGeneElement(vecPeak->at(i),overlapStart,overlapCentral,overlapEnd);
							PrintOverlapPosition((*itr),vecPeak->at(i),overlapStart,overlapCentral,overlapEnd);
							numOfOverlapGenes ++;
						}
						else if((*itr)->m_Strand==Strand_Negative)
						{
							//if a container is not overlapping then it must be closer to the peak
							//but just in case we check it again.

							//for negative strand the distance is calculated as the Gene's end point - peak's Central Point
							int newDistance = abs((*itr)->Minus(vecPeak->at(i)));
							if(ClosestDownstreamNegDistance > newDistance)
							{
								ClosestDownstreamNegDistance = newDistance;
								ClosestDownstreamNegGene = (*itr);										
							}
						}
					}
				}				
			}	
			if(!foundClosestDownstreamPosGene)
				ClosestDownstreamPosGene = NULL;
			if(!foundClosestDownstreamNegGene)
				ClosestDownstreamNegGene = NULL;

			PrintClosestGene(ClosestDownstreamPosGene,ClosestDownstreamPosDistance,
				ClosestDownstreamNegGene,ClosestDownstreamNegDistance,vecPeak->at(i),numOfOverlapGenes);
		}
	}

	cout << "Program Ended successfully" << endl;
}


void PeakAnnotator::ClosestTSS(bool checkOverlap)
{	
	//print headers
	m_outFile << "Chromosome\tPeakStart\tPeakEnd\tDistance\tGeneStart\tGeneEnd\tClosestTSS_ID\tSymbol\tStrand" << endl;		

	map<string,vector<Position* >* >::const_iterator itr;
	//run over the chromosomes
	for(itr = m_MapOfPeaks.begin(); itr != m_MapOfPeaks.end(); ++itr)
	{
		//Get the relevant vectors for the current choromosome
		vector<Position*>* vecPeak = (vector<Position*>*)(itr->second);
		vector<Position*>* vecGenes = (vector<Position*>*)m_MapOfGenes[(itr->first)];
		for(int i=0;i<vecPeak->size();i++)
		{
			if(vecGenes->size() == 0)
			{
				//no genes in the choromosome - nothing to do
				continue;
			}
			Position* ClosestPosition = NULL;
			int closestDistance = INT_MAX;
			list<Position*>::iterator itr;

			//look for the closest gene to the right of the peak. This gene.start > peak.end.
			int closestPositionIndex = FindRightAdjacentGene(vecGenes,(int)vecPeak->at(i)->GetCentralPoint());

			int stopIndex = 0;
			if(closestPositionIndex < 0)
			{
				closestPositionIndex = vecGenes->size();
			}
			else
			{				
				stopIndex = vecGenes->at(closestPositionIndex)->m_End;
			}

			int temp = closestPositionIndex;

			//move right until stop condition is met
			while(temp < vecGenes->size())
			{
				Position* currentPosition = vecGenes->at(temp);	
				int curDistance = 0;
				
				curDistance = currentPosition->Minus(vecPeak->at(i));				

				if(checkOverlap && vecPeak->at(i)->Overlap(currentPosition))
				{
					curDistance = 0;
				}
				if(abs(closestDistance) > abs(curDistance))
				{
					closestDistance = curDistance;
					ClosestPosition = currentPosition;
				}
				temp++;
				//if this condition is met there is no need to contine checking because
				//there are no genes further to the right that can be closer				
				if((currentPosition->m_Start >= stopIndex) || (currentPosition->m_Strand == Strand_Positive))
					break;
			}
			
			closestPositionIndex --;

			if(closestPositionIndex >= 0)
			{
				if(ClosestPosition != NULL)
				{
					stopIndex = max(vecGenes->at(closestPositionIndex)->m_Start,
						(int)vecPeak->at(i)->GetCentralPoint() - abs(closestDistance));
				}
				else
				{
					stopIndex = vecGenes->at(closestPositionIndex)->m_Start;
				}
			}
			//move left until stop condition is met
			while(closestPositionIndex >= 0)
			{
				Position* currentPosition = vecGenes->at(closestPositionIndex);

				int curDistance = 0;
				
				curDistance = currentPosition->Minus(vecPeak->at(i));

				if(checkOverlap && vecPeak->at(i)->Overlap(currentPosition))
				{
					curDistance = 0;
				}
				if(abs(closestDistance) > abs(curDistance))
				{
					closestDistance = curDistance;
					ClosestPosition = currentPosition;
				}
				//if this condition is met there is no need to contine checking because
				//there are no genes further to the left that can be closer beside those containing the current gene
				if(currentPosition->m_End < stopIndex)
				{
					//just check containers				
					for(itr = currentPosition->m_Containers->begin(); itr != currentPosition->m_Containers->end(); ++itr)
					{
						int curDistance = 0;

						curDistance = (*itr)->Minus(vecPeak->at(i));

						if(checkOverlap && vecPeak->at(i)->Overlap(*itr))
						{
							curDistance = 0;
						}
						if(abs(closestDistance) > abs(curDistance))
						{
							closestDistance = curDistance;
							ClosestPosition = (*itr);
						}
					}
					break;
				}				
				closestPositionIndex --;
			}			
			m_outFile << vecPeak->at(i)->m_Chromosome << "\t" << vecPeak->at(i)->m_Start << "\t" << vecPeak->at(i)->m_End << "\t";
			m_outFile << closestDistance << "\t";
			m_outFile << ClosestPosition->m_Start << "\t" << ClosestPosition->m_End << "\t";
			m_outFile << ClosestPosition->m_Id << "\t";
			m_outFile << ClosestPosition->m_Symbol << "\t";
			if(ClosestPosition->m_Strand==Strand_Positive)
			{
				m_outFile << "+";
			}
			else if(ClosestPosition->m_Strand==Strand_Negative)
			{
				m_outFile << "-";
			}
			else
			{
				m_outFile << "\t";
			}
			m_outFile << endl;		
		}	
	}
	cout << "Program Ended successfully" << endl;	
}


void PeakAnnotator::OverlapPositions()
{
	//print headers
	m_outFile << "file1: " << m_File1Name << "\tfile2: " << m_File2Name << endl;
	m_outFile << "Chromosome\tStart1\tEnd1\tName1\tStart2\tEnd2\tName2"<<endl;		

	//counts all unique peaks in file1 that overlaps those in file 2
	int counter = 0;
	
	ofstream Unique1File;
	ofstream Unique2File;
	string unique1 = GetFileName(m_File1Name) + ".unique";
	string unique2 = GetFileName(m_File2Name) + ".unique";
	Unique1File.open(unique1.c_str());
	if(!Unique1File.is_open())
		throw "Failed to open output file: " + unique1;

	Unique2File.open(unique2.c_str());
	if(!Unique2File.is_open())
		throw "Failed to open output file: " + unique2;

	Unique1File << "Chromosome\tStart\tEnd\tName" << endl;
	Unique2File << "Chromosome\tStart\tEnd\tName" << endl;

	map<string,vector<Position*>* >::const_iterator itr;
	for(itr = m_MapOfPeaks.begin(); itr != m_MapOfPeaks.end(); ++itr)
	{		
		vector<Position*>* vecPeak1 = (vector<Position*>*)(itr->second);
		vector<Position*>* vecPeak2 = (vector<Position*>*)m_MapOfGenes[(itr->first)];		
		if(vecPeak2 == NULL)
			continue;

		for(int i=0;i<vecPeak1->size();i++)
		{
			//count is needed in order not to count (in counter) the same peak twice
			int count = 0;

			int closestPositionIndex = FindRightAdjacentGene(vecPeak2,vecPeak1->at(i)->m_End);			
			//all positions to the right are not overlap - no need to check for overlaps
						
			//move from closest to the left	
			int index = closestPositionIndex - 1;	
			if(index < 0)
				index = vecPeak2->size() - 1;

			//each overlap peak is marked in both vectors
			while(index >= 0 && vecPeak2->at(index)->Overlap(vecPeak1->at(i)))
			{
				vecPeak1->at(i)->m_OverlapFlag = true;
				vecPeak2->at(index)->m_OverlapFlag = true;
				m_outFile << vecPeak1->at(i)->m_Chromosome << "\t" << vecPeak1->at(i)->m_Start << "\t" << vecPeak1->at(i)->m_End <<"\t" << vecPeak1->at(i)->m_Id << "\t";
				m_outFile << vecPeak2->at(index)->m_Start << "\t" << vecPeak2->at(index)->m_End << "\t" << vecPeak2->at(index)->m_Id << endl;
				index--;
				if(count == 0)
				{
					count++;
					counter++;
				}
			}
			if(index >= 0)
			{
				//check also those containg the last gene we checked
				list<Position*>::iterator itr;				
				for(itr = vecPeak2->at(index)->m_Containers->begin(); itr != vecPeak2->at(index)->m_Containers->end(); ++itr)
				{
					if((*itr)->Overlap(vecPeak1->at(i)))
					{
						vecPeak1->at(i)->m_OverlapFlag = true;
						(*itr)->m_OverlapFlag = true;
						m_outFile << vecPeak1->at(i)->m_Chromosome << "\t" << vecPeak1->at(i)->m_Start << "\t" << vecPeak1->at(i)->m_End <<"\t" << vecPeak1->at(i)->m_Id << "\t";
						m_outFile << (*itr)->m_Start << "\t" << (*itr)->m_End << "\t" << (*itr)->m_Id << endl;					
						if(count == 0)
						{
							count++;
							counter++;
						}
					}
				}				
			}	
		}

		for(int i=0;i<vecPeak1->size();i++)
		{
			if(vecPeak1->at(i)->m_OverlapFlag == false)
			{
				Unique1File << vecPeak1->at(i)->m_Chromosome << "\t" << vecPeak1->at(i)->m_Start << "\t" << vecPeak1->at(i)->m_End << "\t" << vecPeak1->at(i)->m_Id << endl;
			}
		}
		for(int i=0;i<vecPeak2->size();i++)
		{
			if(vecPeak2->at(i)->m_OverlapFlag == false)
			{
				Unique2File << vecPeak2->at(i)->m_Chromosome << "\t" << vecPeak2->at(i)->m_Start << "\t" << vecPeak2->at(i)->m_End << "\t" << vecPeak2->at(i)->m_Id << endl;
			}
		}
	}

	Unique1File.close();
	Unique2File.close();	
	m_outFile.close();

	//create random datasets based on the second file and count overlaps
	if(m_SizeMap.size()>0)
    {
		cout << "Start to create random datasets on " << m_File1Name << " file" << endl;

        //randomCounter counts how many datasets has more or equal numbers of overlaps as the original file
        float randomCounter = 0;
        float sum = 0;
        int num = 0;     
		srand ( time(NULL) );

        for(int r = 0; r<m_NumRandomDataSets; r++)
        {
            num++;
            if(num % 10==0)
            {
                cout << num << " data sets have been processed" << endl;
            }
            map<string,vector<Position*>*>* random = CreateRandomDataset();
            int count = RandomOverlaps(random);
            sum+=count;
            if(count>=counter)
            {
                randomCounter++;
            }
			map<string,vector<Position*>*>::const_iterator itr;	
			for(itr = random->begin(); itr != random->end(); ++itr)
			{
				vector<Position*>* vec = itr->second;
				for(int i=0;i<vec->size();i++)
				{
					delete vec->at(i);
				}
				delete vec;
			}
			delete random;
        }
        float Pval = randomCounter / m_NumRandomDataSets;
        if(Pval==0.0)
        {
            Pval = 1.0 / m_NumRandomDataSets;
        }
        float average = sum / m_NumRandomDataSets;
		float foldChange;
		if(average != 0)
		{
			foldChange = (float)counter / average;		
			cout << counter << " " << m_File1Name << " peaks overlap those of " << m_File2Name << "\n" <<
				(int)randomCounter << " random data sets had the same number or more overlaps than the original file\n" <<
				"Hence p value is less than " << Pval << "\n" << "The average number of overlaps in random data sets is " <<
				average << " fold change is " << foldChange << endl;
		}
		else
		{
			cout << counter << " " << m_File1Name << " peaks overlap those of " << m_File2Name << "\n" <<
				(int)randomCounter << " random data sets had the same number or more overlaps than the original file\n" <<
				"Hence p value is less than " << Pval << "\n" << "The average number of overlaps in random data sets is " <<
				average << " fold change is infinity" << endl;
		}
    }
	cout << "Program Ended successfully" << endl;	
}



map<string,vector<Position*>*>* PeakAnnotator::CreateRandomDataset()
    {
        //create random dataset based on the second peak file
		map<string,vector<Position*>*>* random = new map<string,vector<Position*>*>();
		map<string,vector<Position*>*>::const_iterator itr;	
		for(itr = m_MapOfPeaks.begin(); itr != m_MapOfPeaks.end(); ++itr)				
        {
            string chr = itr->first;
            vector<Position*>* vecOriginalPeak = m_MapOfPeaks[chr];
            vector<Position*>* curChr = new vector<Position*>(vecOriginalPeak->size());

            for(int i=0;i<vecOriginalPeak->size();i++)
            {
                int length = vecOriginalPeak->at(i)->m_End - vecOriginalPeak->at(i)->m_Start;
                //check if the current chromosome appear in the chromosome size map
                if(m_SizeMap[chr] == 0)
                {
					string err = (string)"Error occured: chromosome " + chr + " is missing from the chromsome sizes file";
                    throw err;
                }				
                int Start = GetInt32Rand() % (m_SizeMap[chr] - length);					
                Position* p  = new Position(Start, Start+length, chr);
				curChr->at(i) = p; 				
            }			
            (*random)[chr] = curChr;			
        }
        return random;
    }

int PeakAnnotator::RandomOverlaps(map<string,vector<Position*>* >* randomMapOfPeaks) 
{
    int counter=0;
	map<string,vector<Position*>*>::const_iterator itr;	
	for(itr = randomMapOfPeaks->begin(); itr != randomMapOfPeaks->end(); ++itr)				        
    {
        string chr = itr->first;
		vector<Position*>* vecPeak1 = (*randomMapOfPeaks)[chr];
		vector<Position*>* vecPeak2 = m_MapOfGenes[chr];      		

		 for(int i=0;i<vecPeak1->size();i++)
		 {
			 //count is needed in order not to count (in counter) the same peak twice
			 int count=0;
			 int closestPositionIndex = FindRightAdjacentGene(vecPeak2,vecPeak1->at(i)->m_End);
			 //all positions to the right are not overlap - no need to check for overlaps

			 //move from closest to the left
			 int index = closestPositionIndex - 1;
			 if(index < 0)
					 index = vecPeak2->size() - 1;

			 //each overlap peak is marked in both vectors
			 while(index >= 0 && vecPeak2->at(index)->Overlap(vecPeak1->at(i)))
			 {
				 index--;
				 if(count==0)
				 {
					 counter++;
					 count++;
				 }
			 }
			 if(index >= 0)
			 {
				 //check also those containing the last gene we checked
				 list<Position*>::iterator itr1;
				 for(itr1 = vecPeak2->at(index)->m_Containers->begin(); itr1 != vecPeak2->at(index)->m_Containers->end(); ++itr)
				 {                     
					 if((*itr1)->Overlap(vecPeak1->at(i)))
					 {
						 if(count==0)
						 {
							 counter++;
							 count++;
						 }
					 }
				 }
			 }
         }
     }
    return counter;
}

void PeakAnnotator::PrintOverlapPosition(Position* gene,Position* position, GeneElement* overlapStart,GeneElement* overlapCentral, GeneElement* overlapEnd)
{
	if(gene->m_Strand == Strand_Positive)
	{
		m_outOverlapFile << position->m_Chromosome << "\t" << position->m_Start << "\t" << position->m_End << "\t" <<
			gene->m_Id << "\t" << gene->m_Symbol << "\t" ;
		if(overlapStart != NULL)
		{
			m_outOverlapFile << overlapStart->GetName() << "\t";
		}
		else
		{
			m_outOverlapFile << "Intergenic" << "\t";
		}
		if(overlapCentral != NULL)
		{
			m_outOverlapFile << overlapCentral->GetName() << "\t";
		}
		else
		{
			m_outOverlapFile << "Intergenic" << "\t";
		}
		if(overlapEnd != NULL)
		{
			m_outOverlapFile << overlapEnd->GetName() << endl;
		}
		else
		{
			m_outOverlapFile << "Intergenic" << endl;
		}		
	}
	else
	{
		m_outOverlapFile << position->m_Chromosome << "\t" << position->m_Start << "\t" << position->m_End << "\t" <<
			gene->m_Id << "\t" << gene->m_Symbol << "\t";
		if(overlapEnd != NULL)
		{
			m_outOverlapFile << overlapEnd->GetName() << "\t";
		}
		else
		{
			m_outOverlapFile << "Intergenic" << "\t";
		}		
		if(overlapCentral != NULL)
		{
			m_outOverlapFile << overlapCentral->GetName() << "\t";
		}
		else
		{
			m_outOverlapFile << "Intergenic" << "\t";
		}
		if(overlapStart != NULL)
		{
			m_outOverlapFile << overlapStart->GetName() << endl;
		}
		else
		{
			m_outOverlapFile << "Intergenic" << endl;
		}
	}
}


void PeakAnnotator::PrintClosestGene(Position* ClosestDownstreamPosGene, int ClosestDownstreamPosDistance,
									 Position* ClosestDownstreamNegGene, int ClosestDownstreamNegDistance,
									 Position* position, int numOfOverlapGenes)
{
	m_outFile << position->m_Chromosome << "\t" << position->m_Start << "\t" << position->m_End << "\t" << numOfOverlapGenes;
	if(ClosestDownstreamPosGene != NULL)
	{
		m_outFile << "\t" << ClosestDownstreamPosGene->m_Id << "\t" << ClosestDownstreamPosGene->m_Symbol << "\t" << ClosestDownstreamPosDistance;
	}
	else
	{
		m_outFile << "\t\t\t";
	}
	if(ClosestDownstreamNegGene != NULL)
	{
		m_outFile << "\t" << ClosestDownstreamNegGene->m_Id << "\t" << ClosestDownstreamNegGene->m_Symbol << "\t" << ClosestDownstreamNegDistance;
	}
	else
	{
		m_outFile << "\t\t\t";
	}
	m_outFile << endl;
}
