package org.peakAnnotator;


import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.EventListener;
import java.util.EventObject;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
//import java.util.zip.ZipInputStream;
import org.Infra.*;

//import org.peakAnnotator.GeneElement.GeneElementType;
import org.peakAnnotator.Position.Strand;

public class PeakAnnotator {
	
    private String m_OutDir;
    
    private String m_PeakFileName;
    private String m_PeakFileNoExtension;
    private String m_PeakFileExtension;
    private String m_prefix;
    private int m_numRandomDatasets;
    
    private String m_GeneFileName;
    private String m_GeneFileNoExtension;
    private String m_GeneFileExtension;
    
    private String m_OverlapFileName;
    private String m_SummaryFileName;
    private String m_UtilityFileName;
    private String m_Unique1;
    private String m_Unique2;
    //output file streams
    private FileWriter m_outUtilityFile;
    private FileWriter m_outOverlapFile;
    private FileWriter m_SummaryFile;
    
    private boolean m_includeNonCoding;
    private boolean isZip;
    private File m_gFile;
    
 // Create the listener list
    protected javax.swing.event.EventListenerList unzipListenerList =
        new javax.swing.event.EventListenerList();
    protected javax.swing.event.EventListenerList readListenerList =
        new javax.swing.event.EventListenerList();

    
    //the map key is the chromosome and the value is a vector of positions in this chromosome
    //the maps are used to
    HashMap<String,ArrayList<Position> > m_MapOfPeaks = new HashMap<String, ArrayList<Position>>();
    HashMap<String,ArrayList<Position> > m_MapOfGenes = new HashMap<String, ArrayList<Position>>();
    ArrayList<String> chrOrder = new ArrayList<String>();
    //Hash table holding chr sizes
    HashMap<String,Integer> m_SizeMap = new HashMap<String, Integer>();


    public PeakAnnotator()
    {
    }
    
	public void Init(String utility, String peakFile, String geneFile, String outputDir, String symbolFile,boolean mustStrand,
			String formattype,String geneType,String prefix,String chrSizeFile,int numRandom) throws Exception
	{	
		m_numRandomDatasets = numRandom;
		if(!prefix.equals(""))
		{
			m_prefix = prefix+".";
		}
		else {m_prefix=prefix;}
			
		File dir = new File(outputDir);
		if(!dir.isDirectory())
		{
			throw new IOException(outputDir + " is not a directory");
		}
		isZip = false;
		File pFile = new File(peakFile);
		if(!pFile.exists())
		{
			throw new IOException(peakFile + " does not exist\n");
		}
		File temp = new File(geneFile);
        if(!temp.exists())
		{
			throw new IOException(geneFile + " does not exist\n");
		}
        if(!symbolFile.equals("NULL"))
        {
        	temp = new File(symbolFile);
        	if(!temp.exists())
        	{
        		throw new IOException(symbolFile + " does not exist\n");
        	}
        }
        if(!chrSizeFile.equals("NULL"))
        {
        	temp = new File(chrSizeFile);
        	if(!temp.exists())
        	{
        		throw new IOException(chrSizeFile + " does not exist\n");
        	}
        }
		
		if(outputDir.endsWith(File.separator))
		{
			m_OutDir = outputDir;
		}
		else
		{
			m_OutDir = outputDir + File.separatorChar;
		}
			
			//keep output file names
	        //find extension
			//check if peakFile name contain the full path, if so separate between the path and the name
			String ParentPath = pFile.getParent();
			if(ParentPath==null)
			{
				m_PeakFileName = peakFile;
			}
			else
			{
				m_PeakFileName = pFile.getName();
			}
			
	        int pos = m_PeakFileName.lastIndexOf('.');
	        if(pos > 0)
	        {
	        	m_PeakFileNoExtension = m_PeakFileName.substring(0, pos);
	        	m_PeakFileExtension = m_PeakFileName.substring(pos);
	        }
	        else
	        {
	        	m_PeakFileNoExtension = m_PeakFileName;
	        	m_PeakFileExtension = "";
	        }
	        
	        //check if the file is compressed
	        if(geneFile.endsWith(".gz") || geneFile.endsWith("zip"))
	        {
	        	geneFile = this.Unzip(geneFile);
	        	isZip=true;
			}
	         m_gFile = new File(geneFile);
	        ParentPath = m_gFile.getParent();
	        
			if(ParentPath==null)
			{
				m_GeneFileName = geneFile;
			}
			else
			{
				m_GeneFileName = m_gFile.getName();
			}
			
	        pos = m_GeneFileName.lastIndexOf('.');
	        if(pos > 0)
	        {
	        	m_GeneFileNoExtension = m_GeneFileName.substring(0, pos);
	        	m_GeneFileExtension = m_GeneFileName.substring(pos);
	        }
	        else
	        {
	        	m_GeneFileNoExtension = m_GeneFileName;
	        	m_GeneFileExtension = "";
	        }
			
	        //Read and Parse input files and group the positions by chromosomes
	        if(geneType.equalsIgnoreCase("protein_coding"))
	        {
	        	m_includeNonCoding = false;
	        }
	        else
	        {
	        	m_includeNonCoding = true;
	        }
	        Initialize(utility,peakFile,geneFile,outputDir,symbolFile,mustStrand,formattype,chrSizeFile);
	}

	private void Initialize(String utility,String peakFile, String geneFile, String outputDir, String symbolFile,boolean mustStrand,String geneFileType,String chrSizeFile) throws Exception
	{
        System.out.println("Starting...");

        GeneralParser general = null;
        //The key of the map is accession number (ID) and the value is the gene's symbol
        HashMap<String,String> IDMap = new HashMap<String, String>();
        boolean hasSymbols = false;
        if(!symbolFile.equals("NULL"))
        {
                hasSymbols = true;
                general = new GeneralParser(symbolFile);
                System.out.println("Reading Symbol File");
                String [] symbols = general.ParseLine();
                //parse line by line of the symbol file
                //the first field of the file is the ID and the second is the symbol
                //the ID,Symbol pair are stored in the map for later lookup
                while(symbols != null)
                {
                    IDMap.put(symbols[0], symbols[1].split(",")[0]); 
                    symbols = general.ParseLine();
                }
        }
        if(!chrSizeFile.equals("NULL"))
        {
        	general = new GeneralParser(chrSizeFile);
        	String [] Sizes = general.ParseLine();
        	//parse line by line of the chrSize file
            //the first field of the file is the chromosome name and the second is the size
            //the chr,size pair are stored in the map for later lookup
        	while(Sizes != null)
            {
        		String chr=Sizes[0];
        		if(!Sizes[0].toUpperCase().startsWith("CHR"))
        		{
        			chr = "CHR" +Sizes[0];
        		}
        		chr=chr.replace("CHR", "chr");
        		//change chrMT to be chrM
        		if(chr.equals("chrMT"))
        		{
        			chr="chrM";
        		}
                m_SizeMap.put(chr, Integer.parseInt(Sizes[1])); 
                Sizes = general.ParseLine();
            }
        	
        }

        //this map is used to mark which of the chromosomes are only in BED file or only in Peak file or in both
        //chromosomes that are in both will have value > 1 
        //chromosomes that are only in BED file will have value = 1
        //chromosomes that are only in Peak file will have value = 0
        HashMap<String,Integer> ChromMap = new HashMap<String, Integer>();	         
        //BED Parser is used to parse the BED file into objects of type "Position"
        
         System.out.println("Reading " + m_GeneFileName + " file");
         Parser bed;
        if(geneFileType.equalsIgnoreCase("BED")){
        	 bed = new BEDParser(geneFile);
        }
        else{
        	 bed = new GTFParser(geneFile);
        }
        Position g = null;
        Position prev = null;
                
        //During the parsing of the files we build for each gene a list of genes containing it
        //the way we do it is by maintaining a queue which on every step holds all genes containing the current gene.
        //if the previous gene contains the current gene - the previous gene is added to the queue
        //In this case all genes already in queue should remain there because if gene X contains gene X+1
        //and gene X+1 contains gene X+2 then obviously gene X also contains gene X+2
        //otherwise - all genes not containing the current gene should be removed from the queue.

        ArrayList<Position> containingQueue = new ArrayList<Position>();
        long totalLength = bed.getSize();
        int times = 0;
        while((g = bed.GetPosition()) != null)
        {
        	times++;
        	if(m_includeNonCoding==false)
        	{
        		if(!g.m_Source.equals("protein_coding")){
            		continue;
            	}
        	}
        	if(times % 1000 == 0)
        	{
        		int progress = (int)(bed.getCurrentOffset() *100 / totalLength);
        		fireReadProgressEvent(progress);
        	}
        	if(hasSymbols)
            {
                    g.m_Symbol = IDMap.get(g.m_Id);
            }
            //mark the chromosome as one that was found in BED file
            ChromMap.put(g.m_Chromosome, 1);
            
            //make sure there is strand information
            //this is required only when searching for closest downstream gene
            if(mustStrand && (g.m_Strand.equals(Strand.Strand_Unknown)))
            {
                    throw new Exception ("Strand information is missing");
            }
            else
            {
            	//add the current gene to the list of genes in its chromosome
                if(m_MapOfGenes.containsKey(g.m_Chromosome)){
                	ArrayList<Position> vec = m_MapOfGenes.get(g.m_Chromosome);
                	vec.add(g);
                }
                else{
                	ArrayList<Position> vec = new ArrayList<Position>();
                	vec.add(g);
                	m_MapOfGenes.put(g.m_Chromosome, vec);
                }
            }
        }
        
        //close the input files and delete unzipped file
        bed.clean();
        if(isZip)
        {
        	m_gFile.delete();
        	
        }
        fireReadProgressEvent(100);
        
        //arrange containers
        //iterate over all chromosomes
        Iterator<ArrayList<Position>> vecItr = m_MapOfGenes.values().iterator();
        //get positions of each chromosome
        while(vecItr.hasNext())
        {
        	ArrayList<Position> vec = vecItr.next();
        	Collections.sort(vec);
        	containingQueue.clear();
        	//check for containers
        	prev = vec.get(0);
        	for(int i=1;i<vec.size();i++)
        	{
        		g = vec.get(i); 
		        if(g.IsContained(prev))
		        {
		            //add previous gene to containing queue - all genes already in queue should remain there because
		            //they also containing the current gene
		        	containingQueue.add(prev);
		        }
		        else
		        {
		            //remove all elements in queue which do not contain g
		            //this functions calls for each element in the list to operator () function
		            //which checks if the position is contained in the given position
		        	if(!containingQueue.isEmpty())
		        	{
		        		Iterator<Position> itr = containingQueue.listIterator();
		            	while(itr.hasNext()){	       
		            		Position p = itr.next();
		            		if(!p.IsContained(g)){
		            			itr.remove();
		            		}	
		            	}
		        	}		        
		        }
		        g.m_Containers.addAll(containingQueue);
		        prev = g;
		        
	        }
        }
	                                
        //we use the same BED parser to parse the peak file too
        System.out.println("Reading " +  m_PeakFileName + " file");
        BEDParser peak = new BEDParser(peakFile);
        Position p = null; 
        prev = null;
        while((p = peak.GetPosition()) != null)
        {
        	
            //add the current peak to the list of peaks in its chromosome
            if(m_MapOfPeaks.containsKey(p.m_Chromosome)){
            	ArrayList<Position> vec = m_MapOfPeaks.get(p.m_Chromosome);
                vec.add(p);
            }
            else{
            	ArrayList<Position> vec = new ArrayList<Position>();
                vec.add(p);
                m_MapOfPeaks.put(p.m_Chromosome, vec);
                chrOrder.add(p.m_Chromosome);
            }
            if(ChromMap.containsKey(p.m_Chromosome)){
            	//check if it is also in BED file (value=1)
            	int val = ChromMap.get(p.m_Chromosome);
            	if(val==1){
            		//is also in BED file
            		ChromMap.put(p.m_Chromosome, 2);
            	}
            }
            else{
            	//only in peak file
            	ChromMap.put(p.m_Chromosome, 0);
            }    
            prev = p;
        }
         
        ArrayList<Position> emptyVec = new ArrayList<Position>();  
        if(!ChromMap.isEmpty())
        {
        	Iterator <String> ChromMapIter = ChromMap.keySet().iterator();

	        while(ChromMapIter.hasNext())
	        {
		    	String curChr = ChromMapIter.next();
		    	int val = ChromMap.get(curChr);
		    	if(val==0)
		    	{
		    		//chromosome is only in peak file and not in BED file
		    		if(utility.equals("ndg")||utility.equals("tss"))
		    		{
		    		System.out.println("Chromosome " + curChr
							+ " was not found in the annotation file");
		    		}
		    		m_MapOfGenes.put(curChr, emptyVec);
		    	}
		    	if(val==1)
		    	{
		    		//chromosome is only in BED file and not in Peak file
		    		if(utility.equals("ndg")||utility.equals("tss"))
		    		{ 
		    			System.out.println("No peaks detected in chromosome "
									+ curChr);
		    		}
		            //add empty vector to the first map
		             m_MapOfPeaks.put(curChr, emptyVec);
		    	}    	
	        }
        }
        
        Iterator<ArrayList<Position>> peakVecItr = m_MapOfPeaks.values().iterator();
        //get positions of each chromosome
        while(peakVecItr.hasNext())
        {
        	ArrayList<Position> vec = peakVecItr.next();
        	Collections.sort(vec);
        }
	}
	
	private void InitOutputFiles(String utility) throws IOException
	{
		if(utility.equals("ods"))
		{
			m_UtilityFileName = m_OutDir + m_prefix+ m_PeakFileNoExtension + "_" + m_GeneFileNoExtension + ".overlap.txt";
			m_outUtilityFile = new FileWriter(m_UtilityFileName);
		}
		else
		{
			m_UtilityFileName = m_OutDir + m_prefix + m_PeakFileNoExtension + "." + utility + m_PeakFileExtension;
	        m_outUtilityFile = new FileWriter(m_UtilityFileName);
	        
	        m_OverlapFileName = m_OutDir + m_prefix + m_PeakFileNoExtension + ".overlap" + m_PeakFileExtension;
	        m_SummaryFileName = m_OutDir + m_prefix + m_PeakFileNoExtension + ".summary" + m_PeakFileExtension;
		}
	}
	public void ClosestDownstreamGenes() throws Exception
	{
		InitOutputFiles("ndg");
		m_outOverlapFile = new FileWriter(m_OverlapFileName);
		m_SummaryFile = new FileWriter(m_SummaryFileName);
	       
        //print headers
		m_outUtilityFile.write("Chromosome"+"\t"+"Start"+"\t"+"End"+"\t"+"#Overlaped_Genes"
                 +"\t"+"Downstream_FW_Gene"+"\t"+"Symbol"+"\t"+"Distance"
                 +"\t"+"Downstream_REV_Gene"+"\t"+"Symbol"+"\t"+"Distance"+"\n");
                 
        m_outOverlapFile.write("Chromosome"+"\t"+"Start"+"\t"+"End"+"\t"+"OverlapGene"
                 +"\t"+"Symbol"+"\t"+"Overlap_Begin" + "\t"+"Overlap_Center"+"\t"+"Overlap_End"+"\n");
        m_SummaryFile.write("Chromosome"+"\t"+"Start"+"\t"+"End"+"\t"+"OverlapGene"+"\t"+"Downstream_Gene"+"\t"+"Distance"+"\n");
                        
        
        //run over the chromosomes
        for(int chridx =0;chridx<chrOrder.size();chridx++){
        	String curChr = chrOrder.get(chridx);
        	//Get the relevant vectors for the current choromosome
        	ArrayList<Position> vecPeak = m_MapOfPeaks.get(curChr);
        	ArrayList<Position> vecGenes = m_MapOfGenes.get(curChr);
        	if(vecPeak.size()==0 || vecGenes.size()==0)
        	{
        		continue;
        	}
        	 //run over the peaks - for each peak find the closest gene
	        	for(int i=0;i<vecPeak.size();i++)
	            {
	                int temp;
	                String summary="";
	                
	                int numOfOverlapGenes = 0;
	                boolean foundClosestDownstreamPosGene = false;
	                Position ClosestDownstreamPosGene = null;
	                int ClosestDownstreamPosDistance=0;
	         
	                //look for the closest gene to the right of the peak. This gene.start > peak.end.
	                int closestPositionIndex = BinarySearch.FindRightAdjacentGene(vecGenes,vecPeak.get(i).m_End);
	                if(closestPositionIndex < 0)
	                {
	                        //no such element no need to move right
	                        closestPositionIndex = vecGenes.size();
	                }
	                temp = closestPositionIndex;
	        
	                //move right until found closest downstream gene. the genes here cannot overlap the peak
	                //because start gene > end peak
	                while(foundClosestDownstreamPosGene == false && temp < vecGenes.size())
	                {
                        ClosestDownstreamPosGene = vecGenes.get(temp++);
                        if(ClosestDownstreamPosGene.m_Strand.equals(Strand.Strand_Positive))
                        {
                    		//we found the closest downstream gene of the positive strand
                            foundClosestDownstreamPosGene=true;
            
                            //for positive strand the distance is calculated as the peak's Central Point - Gene's start point
                            	ClosestDownstreamPosDistance = Math.abs(ClosestDownstreamPosGene.Minus(vecPeak.get(i)));                          
                        }
	                }
	                     
	                temp = closestPositionIndex;
	                boolean foundClosestDownstreamNegGene = false;
	                int ClosestDownstreamNegDistance = 0;
	                Position ClosestDownstreamNegGene = null;
	                //start moving left from the gene we found in the binary search
	                //when moving left - genes can overlap the peak
	                
	                while(foundClosestDownstreamNegGene == false && temp > 0)
	                {
	                	ClosestDownstreamNegGene = vecGenes.get(--temp);
	                    if(ClosestDownstreamNegGene.Overlap(vecPeak.get(i)))
	                    {
	                    	//create array of GeneElements, such that first member is overlapStart,second overlapCentral, and third overlapEnd
	    	                ArrayList<GeneElement> overlapPosition = new ArrayList<GeneElement>();
	    	                
	                        ClosestDownstreamNegGene.FindOverlapGeneElement(vecPeak.get(i), overlapPosition);
	                        PrintOverlapPosition(ClosestDownstreamNegGene,vecPeak.get(i),overlapPosition);
	                        numOfOverlapGenes ++;
	                        if(!summary.contains(ClosestDownstreamNegGene.GetSymbol()))
	                        	summary += ClosestDownstreamNegGene.GetSymbol() + ",";
	                    }
	                    
	                    else if(ClosestDownstreamNegGene.m_Strand.equals(Strand.Strand_Negative))
	                    {
	                        //if the gene doesn't overlap and is negative strand - we found a candidate to be the closest downstream gene
	                        //other genes that containing this gene were not checked yet (because their start position is smaller then this
	                        //gene's start position, but their end position is bigger that this gene's end position - because they contain it
	                    	foundClosestDownstreamNegGene = true;
	            
	                        //for negative strand the distance is calculated as the Gene's end point minus the peak's Central Point
	                    	ClosestDownstreamNegDistance = Math.abs(ClosestDownstreamNegGene.Minus(vecPeak.get(i)));
	                    
	                        //check if its containers overlaps or closest negative strand downstream gene
	
	                        Position currentPosition = ClosestDownstreamNegGene;   
	                        if(!currentPosition.m_Containers.isEmpty())
	                        {
	                        	Iterator <Position> itr2 = currentPosition.m_Containers.listIterator();
		                        while(itr2.hasNext())
		                        {
		                        	Position p = itr2.next();
		                        	if(p.Overlap(vecPeak.get(i))){
		                        		//create array of GeneElements, such that first member is overlapStart,second overlapCentral, and third overlapEnd
		            	                ArrayList<GeneElement> overlapPosition = new ArrayList<GeneElement>();
		            	                
		                        		//if the containing gene overlaps - we want to print it but this is not a downstream gene
		                        		p.FindOverlapGeneElement(vecPeak.get(i),overlapPosition);
		                        		PrintOverlapPosition(p,vecPeak.get(i),overlapPosition);
		                        		numOfOverlapGenes ++;		                     
		                        		if(!summary.contains(p.GetSymbol()))		                       			
		                        			summary += p.GetSymbol() + ",";	 		                        		
		                        	}
		                        	else if (p.m_Strand.equals(Strand.Strand_Negative))
		                        	{
			                		 //if a container is not overlapping then it must be closer to the peak
			                         //but just in case we check it again.
			                         //for negative strand the distance is calculated as the Gene's end point - peak's Central Point
		                        
		                        	int newDistance = Math.abs(p.Minus(vecPeak.get(i)));
		                        	   
		                           if(ClosestDownstreamNegDistance > newDistance)
		                           {
		                                   ClosestDownstreamNegDistance = newDistance;
		                                   ClosestDownstreamNegGene = p;    
		                           }
			                	    }  	
		                        }
	                        }
	                    }
	                }
	                if(!foundClosestDownstreamPosGene)
	                        ClosestDownstreamPosGene = null;
	                if(!foundClosestDownstreamNegGene)
	                        ClosestDownstreamNegGene = null;
	                                        
	                PrintClosestGene(ClosestDownstreamPosGene,ClosestDownstreamPosDistance,
	                        ClosestDownstreamNegGene,ClosestDownstreamNegDistance,vecPeak.get(i),numOfOverlapGenes);
	                //treat cases where there are no closest downstream genes
	                if(!foundClosestDownstreamPosGene)
	                {
	                	PrintSummary(vecPeak.get(i), summary, ClosestDownstreamNegGene, ClosestDownstreamNegDistance);
	                }
	                else if (!foundClosestDownstreamNegGene)
	                {
	                	PrintSummary(vecPeak.get(i), summary, ClosestDownstreamPosGene, ClosestDownstreamPosDistance);
	                }
	                else
	                {
		                if(ClosestDownstreamNegDistance > ClosestDownstreamPosDistance)
		                {
		                	PrintSummary(vecPeak.get(i), summary, ClosestDownstreamPosGene, ClosestDownstreamPosDistance);
		                }
		                else
		                {
		                	PrintSummary(vecPeak.get(i), summary, ClosestDownstreamNegGene, ClosestDownstreamNegDistance);
		                }
	                }
	            }
        }       
       m_outUtilityFile.close();
       m_outOverlapFile.close();
       m_SummaryFile.close();
       System.out.println( "Program ended successfully");
}
	
	public void ClosestTSS(boolean checkOverlap) throws Exception
	{
		InitOutputFiles("tss");
	        //print headers
		m_outUtilityFile.write("Chromosome\tPeakStart\tPeakEnd\tDistance\tGeneStart\tGeneEnd\tClosestTSS_ID\tSymbol\tStrand" + "\n");
	        //run over the chromosomes
	        Iterator <String> itr = m_MapOfPeaks.keySet().iterator();
	        while(itr.hasNext()){
	        	String chr = itr.next();
	        	//Get the relevant vectors for the current choromosome
	        	ArrayList<Position> vecPeak = m_MapOfPeaks.get(chr);
                ArrayList<Position> vecGenes = m_MapOfGenes.get(chr);
               
                for(int i=0;i<vecPeak.size();i++)
                {
                    if(vecGenes.size() == 0)
                    {
                            //no genes in the choromosome - nothing to do
                            continue;
                    }
                    Position ClosestPosition = null;
                    int closestDistance = Integer.MAX_VALUE;
                    //look for the closest gene to the right of the peak. This gene.start > peak central point.
                    int closestPositionIndex = BinarySearch.FindRightAdjacentGene(vecGenes,(int)vecPeak.get(i).GetCentralPoint());
        
                    int stopIndex=0;
                    if(closestPositionIndex < 0)
                    {
                            closestPositionIndex = vecGenes.size();
                    }
                    else
                    {
                            stopIndex = vecGenes.get(closestPositionIndex).m_End;
                    }
                    
                    int temp = closestPositionIndex;
                            
                    //move right until stop condition is met
                    while(temp < vecGenes.size())
                    {
                            Position currentPosition = vecGenes.get(temp);
                        
                            int	curDistance = currentPosition.Minus(vecPeak.get(i));       	
                            if(checkOverlap && vecPeak.get(i).Overlap(currentPosition))
                            {
                                curDistance = 0;
                            }
                        if(Math.abs(closestDistance) > Math.abs(curDistance))
                        {
                                closestDistance = curDistance;
                                ClosestPosition = currentPosition;
                        }
                        temp++;
                        //if this condition is met there is no need to continue checking because
                        //there are no genes further to the right that can be closer
                        	if((currentPosition.m_Start >= stopIndex) || (currentPosition.m_Strand.equals(Strand.Strand_Positive)))
                        			break;
                    }
                            
                    closestPositionIndex --;
                    
                    if(closestPositionIndex >= 0)
                    {
                        if(ClosestPosition != null)
                        {
                        	stopIndex = Math.max(vecGenes.get(closestPositionIndex).m_Start,(int)vecPeak.get(i).GetCentralPoint() - Math.abs(closestDistance));
                        }
                        else
                        {   
                        	stopIndex = vecGenes.get(closestPositionIndex).m_Start;
                        }
                    }
                    //move left until stop condition is met
                    while(closestPositionIndex >= 0)
                    {
                        Position currentPosition = vecGenes.get(closestPositionIndex);
                        int	curDistance = currentPosition.Minus(vecPeak.get(i));
                        if(checkOverlap && vecPeak.get(i).Overlap(currentPosition))
                        {
                            curDistance = 0;
                        }
                        if(Math.abs(closestDistance) > Math.abs(curDistance))
                        {
                            closestDistance = curDistance;
                            ClosestPosition = currentPosition;
                        }
                        //if this condition is met there is no need to continue checking because
                        //there are no genes further to the left that can be closer beside those containing the current gene
                        if(currentPosition.m_End < stopIndex)
                        {
                            //just check containers
                        	if(!currentPosition.m_Containers.isEmpty())
	                        {
	                        	Iterator<Position> itr2 =currentPosition.m_Containers.iterator();
	                        	while(itr2.hasNext()){
	                        		Position p = itr2.next();
	                               	curDistance = p.Minus(vecPeak.get(i));            
	                                if(checkOverlap && vecPeak.get(i).Overlap(p))
	                                {
	                                        curDistance = 0;
	                                }
	                                if(Math.abs(closestDistance) > Math.abs(curDistance))
	                                {
	                                        closestDistance = curDistance;
	                                        ClosestPosition = p;
	                                }
	                        	}
	                        }
                            break;
                        }
                        closestPositionIndex --;
                    }
                    m_outUtilityFile.write(vecPeak.get(i).m_Chromosome + "\t" + vecPeak.get(i).m_Start + "\t" + vecPeak.get(i).m_End + "\t");
                    m_outUtilityFile.write(closestDistance + "\t");
                    m_outUtilityFile.write(ClosestPosition.m_Start + "\t" + ClosestPosition.m_End + "\t");
                    m_outUtilityFile.write(ClosestPosition.m_Id + "\t");
                    m_outUtilityFile.write(ClosestPosition.m_Symbol + "\t");
                    if(ClosestPosition.m_Strand.equals(Strand.Strand_Positive))
                    {
                    	m_outUtilityFile.write("+");
                    }
                    else if(ClosestPosition.m_Strand.equals(Strand.Strand_Negative))
                    {
                    	m_outUtilityFile.write("-");
                    }
                    else
                    {
                    	m_outUtilityFile.write("\t");
                    }
                    m_outUtilityFile.write("\n");
            }
        }
	        m_outUtilityFile.close();
	        System.out.println("Program ended successfully");
	}
	
	
	public void OverlapPositions() throws Exception
	{
		//counter counts all unique peaks in file1 that overlap those of file2
		int counter=0;
		InitOutputFiles("ods");
	        //print headers
		m_outUtilityFile.write("Chromosome\t"+m_PeakFileName+"_Start\t"+m_PeakFileName+"_End\t"+m_PeakFileName+"_Name\t"+m_GeneFileName+"_Start\t"+m_GeneFileName+"_End\t"+m_GeneFileName+"_Name"+"\n");
	                         
	        m_Unique1 = m_OutDir +m_prefix+ m_PeakFileNoExtension + ".unique" + m_PeakFileExtension;
			FileWriter Unique1File = new FileWriter(m_Unique1);
			 m_Unique2 = m_OutDir +m_prefix + m_GeneFileNoExtension + ".unique" + m_GeneFileExtension;
			 if(m_Unique1.equals(m_Unique2))
			 {
				 m_Unique2 = m_OutDir + m_prefix + m_GeneFileNoExtension + ".unique" + m_GeneFileExtension+"b";
			 }
	        FileWriter Unique2File = new FileWriter(m_Unique2);
	      
	        Unique1File.write("Chromosome\tStart\tEnd\tName" + "\n");
	        Unique2File.write("Chromosome\tStart\tEnd\tName" + "\n");
	                       
	        Iterator <String> itr = m_MapOfPeaks.keySet().iterator();
	        while(itr.hasNext()){
	        	String chr = itr.next();
	        	ArrayList<Position> vecPeak1 = m_MapOfPeaks.get(chr);
                ArrayList<Position> vecPeak2 = m_MapOfGenes.get(chr);
           
                for(int i=0;i<vecPeak1.size();i++)
                {
                	//count is needed in order not to count (in counter) the same peak twice
                	int count=0;
                    int closestPositionIndex = BinarySearch.FindRightAdjacentGene(vecPeak2,vecPeak1.get(i).m_End);
                    //all positions to the right are not overlap - no need to check for overlaps
    
                    //move from closest to the left
                    int index = closestPositionIndex - 1;
                    if(index < 0)
                            index = vecPeak2.size() - 1;
    
                    //each overlap peak is marked in both vectors
                    while(index >= 0 && vecPeak2.get(index).Overlap(vecPeak1.get(i)))
                    {
                        vecPeak1.get(i).m_OverlapFlag = true;
                        vecPeak2.get(index).m_OverlapFlag = true;
                        m_outUtilityFile.write(vecPeak1.get(i).m_Chromosome + "\t" + vecPeak1.get(i).m_Start + "\t" + vecPeak1.get(i).m_End +"\t" + vecPeak1.get(i).GetSymbol() + "\t");
                        m_outUtilityFile.write(vecPeak2.get(index).m_Start + "\t" + vecPeak2.get(index).m_End + "\t" + vecPeak2.get(index).GetSymbol() + "\n");
                        index--;
                        if(count==0)
                        {
                        	counter++;
                        	count++;
   		                }
                    }
                    if(index >= 0)
                    {
                        //check also those containing the last gene we checked
                    	Iterator<Position> itr1 = vecPeak2.get(index).m_Containers.iterator();
                    	while(itr1.hasNext()){
                    		Position p = itr1.next();
                    		if(p.Overlap(vecPeak1.get(i)))
                            {
                                vecPeak1.get(i).m_OverlapFlag = true;
                                p.m_OverlapFlag = true;
                                m_outUtilityFile.write(vecPeak1.get(i).m_Chromosome + "\t" + vecPeak1.get(i).m_Start + "\t" + vecPeak1.get(i).m_End +"\t"+vecPeak1.get(i).GetSymbol()+"\t");
                                m_outUtilityFile.write(p.m_Start + "\t" + p.m_End + "\t" + p.GetSymbol() + "\n");   
                                if(count==0)
                                {
                                	counter++;
                                	count++;
                                }
                            }
                    	}
                    }
                }
            
                for(int i=0;i<vecPeak1.size();i++)
                {
                    if(vecPeak1.get(i).m_OverlapFlag == false)
                    {
                            Unique1File.write(vecPeak1.get(i).m_Chromosome + "\t" + vecPeak1.get(i).m_Start + "\t" + vecPeak1.get(i).m_End + "\t" + vecPeak1.get(i).m_Id+"\n");
                    }
                }
                for(int i=0;i<vecPeak2.size();i++)
                {
                    if(vecPeak2.get(i).m_OverlapFlag == false)
                    {
                            Unique2File.write(vecPeak2.get(i).m_Chromosome + "\t" + vecPeak2.get(i).m_Start + "\t" + vecPeak2.get(i).m_End + "\t" + vecPeak2.get(i).m_Id+"\n");
                    }
                }
	        }
	        m_outUtilityFile.close();
	        Unique1File.close();
	        Unique2File.close();
	        
	        System.out.println("Start to create random datasets based on " + m_PeakFileName+ " file");
	        //create 1000 random datasets based on the second file, and count overlaps
	        if(m_SizeMap.size()>0)
	        {
	        	//randomCounter counts how many datasets has more or equal numbers of overlaps as the original file
	        	float randomCounter=0;
	        	float sum=0;
	        	int num=0;
	        	Random generator = new Random();
	        	for(int r=0;r<m_numRandomDatasets;r++)
		        {
	        		num++;
	        		if(num%100==0)
	        		{
	        			System.out.println(num + " data sets have been processed");
	        		}
	        		HashMap<String,ArrayList<Position>> random = createRandomDataset(generator);
	        		int count = RandomOverlaps(random);
	        		sum+=count;
	        		if(count>=counter)
	        		{
	        			randomCounter++;
	        		}
		        }
	        	float Pval = randomCounter/m_numRandomDatasets;
	        	if(Pval==0.0)
	        	{
	        		Pval=1f/m_numRandomDatasets;
	        	}
	        	float average = sum/m_numRandomDatasets;
	        	float foldChange = counter/average;
	        	System.out.println(counter + " " + m_PeakFileName +" peaks overlap those of " + m_GeneFileName + "\n" +(int)randomCounter + " random data sets had the same number or more overlaps than the original file\n" + 
	        			"Hence p value is less than " + Pval + "\n" + "The average number of overlaps in random data sets is " + average + " fold change is " + foldChange); 
	        }
	        
	        
	       System.out
					.println( "Program ended successfully");
	}
	
	private HashMap<String,ArrayList<Position>> createRandomDataset(Random generator) throws Exception
	{
		//create random dataset based on the second peak file
		HashMap<String,ArrayList<Position>> random = new HashMap<String, ArrayList<Position>>();
		Iterator <String> itr = m_MapOfPeaks.keySet().iterator();
		while(itr.hasNext())
        {
        	String chr = itr.next();
        	ArrayList<Position> vecOriginalPeak = m_MapOfPeaks.get(chr);
        	ArrayList<Position> curChr = new ArrayList<Position>();
        	for(int i=0;i<vecOriginalPeak.size();i++)
            {
        		int length = vecOriginalPeak.get(i).m_End - vecOriginalPeak.get(i).m_Start;
        		//check if the current chromosome appear in the chromosome size map
        		if(!m_SizeMap.containsKey(chr))
        		{
        			throw new Exception ("Error occured: chromosome " + chr + " is missing from the chromsome sizes file");
        		}
        		int Start = generator.nextInt(m_SizeMap.get(chr) - length);
        		Position p  = new Position(Start, Start+length,chr);
        		curChr.add(p);	
            }
        	random.put(chr, curChr);
        }
		

		return random;
	}
	private int RandomOverlaps(HashMap<String,ArrayList<Position>> randomMapOfPeaks) throws IOException
	{
		int counter=0; 
		Iterator <String> itr = randomMapOfPeaks.keySet().iterator();
	        while(itr.hasNext())
	        {
	        	String chr = itr.next();
	         ArrayList<Position> vecPeak1 = randomMapOfPeaks.get(chr);
	         ArrayList<Position> vecPeak2 = m_MapOfGenes.get(chr);
        
             for(int i=0;i<vecPeak1.size();i++)
             {
             	//count is needed in order not to count (in counter) the same peak twice
             	int count=0;
                 int closestPositionIndex = BinarySearch.FindRightAdjacentGene(vecPeak2,vecPeak1.get(i).m_End);
                 //all positions to the right are not overlap - no need to check for overlaps
 
                 //move from closest to the left
                 int index = closestPositionIndex - 1;
                 if(index < 0)
                         index = vecPeak2.size() - 1;
 
                 //each overlap peak is marked in both vectors
                 while(index >= 0 && vecPeak2.get(index).Overlap(vecPeak1.get(i)))
                 {
                     index--;
                     if(count==0)
                     {
                     	counter++;
                     	count++;
                     }
                 }
                 if(index >= 0)
                 {
                     //check also those containing the last gene we checked
                 	Iterator<Position> itr1 = vecPeak2.get(index).m_Containers.iterator();
                 	while(itr1.hasNext()){
                 		Position p = itr1.next();
                 		if(p.Overlap(vecPeak1.get(i)))
                         {
                             if(count==0)
                             {
                             	counter++;
                             	count++;
                             }
                         }
                 	 }
                 }
             }
	     }
	    return counter;
	}
	private void PrintOverlapPosition(Position gene,Position position, ArrayList<GeneElement> overlapPosition) throws IOException
	{           
		GeneElement overlapStart = overlapPosition.get(0);
		GeneElement overlapCentral = overlapPosition.get(1);
		GeneElement overlapEnd = overlapPosition.get(2);
		
        if(gene.m_Strand.equals(Strand.Strand_Positive))
        {
                m_outOverlapFile.write(position.m_Chromosome + "\t" + position.m_Start + "\t" + position.m_End + "\t" +
                        gene.m_Id + "\t" + gene.m_Symbol + "\t") ;
                if(overlapStart != null)
                {
                        m_outOverlapFile.write(overlapStart.GetName() + "\t");
                }
                else
                {
                        m_outOverlapFile.write("Intergenic" + "\t");
                }
                if(overlapCentral != null)
                {
                        m_outOverlapFile.write(overlapCentral.GetName() + "\t");
                }
                else
                {                
                        m_outOverlapFile.write("Intergenic" + "\t");
                }
                if(overlapEnd != null)
                {
                        m_outOverlapFile.write(overlapEnd.GetName() + "\n");
                }
                else
                {
                        m_outOverlapFile.write("Intergenic" + "\n");
                }
        }
        else
        {
                m_outOverlapFile.write(position.m_Chromosome + "\t" + position.m_Start + "\t" + position.m_End + "\t" +
                        gene.m_Id + "\t" + gene.m_Symbol + "\t");
                if(overlapEnd != null)
                {
                        m_outOverlapFile.write(overlapEnd.GetName() + "\t");
                }
                else
                {  
                       m_outOverlapFile.write("Intergenic" + "\t");
                }
                if(overlapCentral != null)
                {
                        m_outOverlapFile.write(overlapCentral.GetName() + "\t");
                }
                else
                {
                        m_outOverlapFile.write("Intergenic" + "\t");
                }
                if(overlapStart != null)
                {
                        m_outOverlapFile.write(overlapStart.GetName() + "\n");
                }
                else
                {
                        m_outOverlapFile.write("Intergenic" + "\n");
                }
        }
	}
	private void PrintClosestGene(Position ClosestDownstreamPosGene, int ClosestDownstreamPosDistance,
            Position ClosestDownstreamNegGene, int ClosestDownstreamNegDistance,
            Position position, int numOfOverlapGenes) throws IOException
	{
		m_outUtilityFile.write(position.m_Chromosome + "\t" + position.m_Start + "\t" + position.m_End + "\t" + numOfOverlapGenes);
		if(ClosestDownstreamPosGene != null)
		{
			m_outUtilityFile.write("\t" + ClosestDownstreamPosGene.m_Id + "\t" + ClosestDownstreamPosGene.m_Symbol + "\t" + ClosestDownstreamPosDistance);
		}
		else
		{
			m_outUtilityFile.write("\t\t\t");
		}
		if(ClosestDownstreamNegGene != null)
		{
			m_outUtilityFile.write("\t"+ ClosestDownstreamNegGene.m_Id + "\t" + ClosestDownstreamNegGene.m_Symbol + "\t" + ClosestDownstreamNegDistance);
		}
		else
		{
			m_outUtilityFile.write("\t\t\t");
		}               
		m_outUtilityFile.write("\n");
	}
	
	private  String Unzip(String inFilePath) throws IOException
	{
		File input = new File(inFilePath);
		String parentPath = input.getParent();
		
		String outFilePath=null;
		byte[] buf = new byte[102400];
	    int len;
	    OutputStream out=null;
	    
	    System.out.println("Unzipping "+ input.getName());
	    
		if(inFilePath.endsWith(".gz"))
		{
			GZIPInputStream gzipInputStream = new GZIPInputStream(new FileInputStream(inFilePath),10000);
			outFilePath = inFilePath.replace(".gz", "");
			out = new FileOutputStream(outFilePath);
			long totalSize = input.length();
			int progress = 0;
			long readBytes = 0;
			 while ((len = gzipInputStream.read(buf,0,buf.length)) > 0)
			 {
				 	readBytes += 10000;
				 	progress = (int)(readBytes*100/totalSize);
			        out.write(buf, 0, len);
			        //fireUnzipProgressEvent(progress);
			 }
			 gzipInputStream.close();
		}
		else
		{
			ZipFile zipFile = new ZipFile(inFilePath);
			Enumeration entries = zipFile.entries();
			
			if(zipFile.size()==0)
			{
				throw new IOException("input zip file " + inFilePath + " is empty\n");
			}
			
			else if(zipFile.size()>1)
			{
				throw new IOException("input zip file " + inFilePath + " contain more than one file\n");
			}

			while(entries.hasMoreElements()) 
			{
		        ZipEntry entry = (ZipEntry)entries.nextElement();

		        if(entry.isDirectory()) 
		        {
		          // Assume directories are stored parents first then children.
		         throw new IOException(entry.getName() + " is a directory, stop extracting zip file\n");
		        } 
		         outFilePath = entry.getName();
		        if(parentPath!=null)
		        {
		        	outFilePath = parentPath+File.separator+entry.getName();
		        }
		        InputStream in = zipFile.getInputStream(entry);
		        out = new FileOutputStream(outFilePath);
		        while ((len = in.read(buf)) > 0)
			        out.write(buf, 0, len);       
		        in.close();
			}
		}
	    out.close();
	    return outFilePath;
	}
	
	// This methods allows classes to register for unZipProgressEvent
    public void adduUnzipProgressEventListener(ProgressEventListener listener) {
        unzipListenerList.add(ProgressEventListener.class, listener);
    }

    // This methods allows classes to unregister for unZipProgressEvent
    public void removeUnzipProgressEventListener(ProgressEventListener listener) {
       unzipListenerList.remove(ProgressEventListener.class, listener);
    }

    // This private class is used to fire unZipProgressEvent
    void fireUnzipProgressEvent(int progress)
    {
    	if(progress > 100)
    		progress=100;
    	ProgressEvent evt = new ProgressEvent(this,progress);
        Object[] listeners = unzipListenerList.getListenerList();
        // Each listener occupies two elements - the first is the listener class
        // and the second is the listener instance
        for (int i=0; i<listeners.length; i+=2) {
            if (listeners[i]==ProgressEventListener.class) {
                ((ProgressEventListener)listeners[i+1]).progressEventOccurred(evt);
            }
        }
    }
    
 // This methods allows classes to register for unZipProgressEvent
    public void addReadProgressEventListener(ProgressEventListener listener) {
        readListenerList.add(ProgressEventListener.class, listener);
    }

    // This methods allows classes to unregister for unZipProgressEvent
    public void removeReadProgressEventListener(ProgressEventListener listener) {
       readListenerList.remove(ProgressEventListener.class, listener);
    }

    // This private class is used to fire unZipProgressEvent
    void fireReadProgressEvent(int progress)
    {
    	if(progress > 100)
    		progress=100;
    	ProgressEvent evt = new ProgressEvent(this,progress);
        Object[] listeners = readListenerList.getListenerList();
        // Each listener occupies two elements - the first is the listener class
        // and the second is the listener instance
        for (int i=0; i<listeners.length; i+=2) {
            if (listeners[i]==ProgressEventListener.class) {
                ((ProgressEventListener)listeners[i+1]).progressEventOccurred(evt);
            }
        }
    }
    
    
	
	private void PrintSummary(Position peak, String summary, Position ClosestDownstreamGene, int ClosestDownstreamDistance) throws IOException
	{
		m_SummaryFile.write(peak.m_Chromosome + "\t" + peak.m_Start + "\t" + peak.m_End + "\t" + summary + "\t");
		if(ClosestDownstreamGene!=null){
			m_SummaryFile.write(ClosestDownstreamGene.GetSymbol() + "\t" + ClosestDownstreamDistance);
		}
		else{
			m_SummaryFile.write("\t\t\t");
		}
		m_SummaryFile.write("\n");
	}
	public String getUtilityFileName()
	{
		File temp = new File(m_UtilityFileName);
		return  temp.getName();
	}
	public String getOverlapFileName()
	{
		File temp = new File(m_OverlapFileName);
		return  temp.getName();
	}
	public String getSummaryFileName()
	{
		File temp = new File(m_SummaryFileName);
		return  temp.getName();
	}
	public String getUnique1Name()
	{
		File temp = new File(m_Unique1);
		return  temp.getName();
	}
	public String getUnique2Name()
	{
		File temp = new File(m_Unique2);
		return  temp.getName();
	}
}




