package org.peakAnnotator;

import java.io.IOException;
import java.util.ArrayList;

import org.peakAnnotator.Position.Strand;

public class BEDParser extends Parser {
	
	public BEDParser(String fileName) throws IOException
	{
		super(fileName);
	        //Read first header line
			m_File.mark(5000);
	        String line = m_File.readLine();
	      //check if header line exist
	        try
	        {
	        	String[] elements = this.split(line);
	        	Integer.parseInt(elements[1]);
        		Integer.parseInt(elements[2]);
	        }
	        catch (NumberFormatException e2)
    		{
	        	m_File.mark(5000);
		        line = m_File.readLine();
    		}
	        
/*	        //check if track definition exist
	        if(line.toLowerCase().contains("track"))
	        {
	        	m_File.mark(5000);
		        line = m_File.readLine();
	        }
*/	        
	        String[] elements = this.split(line);
	        if(elements.length < 3)
	        {
	                throw new IOException ("Invalid file format");
	        }
	        else
	        {
        		try
	        	{
	        		Integer.parseInt(elements[1]);
	        		Integer.parseInt(elements[2]);
	        	}
        		catch (NumberFormatException e2)
        		{
        			throw new IOException ("Invalid file format");
        		}
	        	
	        }
	        m_File.reset();
	}
	private String[] split(String line)
	{
		String[] temptokens = line.split("[\t\\s\"]");
		ArrayList<String> tokens = new ArrayList<String>();
		//remove empty entries
		for(int i = 0; i < temptokens.length; ++i) 
		{
			  if(temptokens[i] != null && !temptokens[i].equals("")) 
			  {
				  tokens.add(temptokens[i]);
			  }
		}
		String[] copytokens = new String[tokens.size()];
		return (String[]) (tokens.toArray(copytokens));
	}
	public Position GetPosition() throws IOException
	{
        String line = m_File.readLine();
        try
        {
	        if(line==null || line.equals("")){
	        	return null;
	        }
	        
	        m_currentOffset += line.length();
	        ArrayList<GeneElement> geneElementsVec = new ArrayList<GeneElement>();
	        String[] elements = this.split(line);              
	        int Size = elements.length;
	        String chrom = elements[0].toUpperCase();
	        int start = Integer.parseInt(elements[1]);
	        int end = Integer.parseInt(elements[2]);
	        String name = "";
	        if(Size > 3)
	        {
	                name = elements[3];
	        }
	        Strand strand = Strand.Strand_Positive;
	        if(Size > 5)
	        {
	                //we expect the strand to be + or - any other symbol is ignored
	                if(elements[5].equals("+"))
	                {       
	                        strand = Strand.Strand_Positive;
	                }
	                else if(elements[5].equals("-"))
	                {       
	                        strand = Strand.Strand_Negative;
	                }
	        }
	        //if we don't have the information about the gene's elements
	        //we stop here by building simple Position object.
	        if(Size <= 6)   
	        {
	                Position g = new Position(start,end,name,null,chrom,strand,"protein_coding",line);
	                return g;
	        }
	                        
	        if(Size < 12)
	        {
	                throw new IOException ("Invalid BED file format - missing columns: 12 columns expected");
	        }
	        //will get to these lines only when a bed file describe a ucsc gene
	        start=start+1;
	        int cdsStart = Integer.parseInt(elements[6])+1;
	        UTR beginUTR = null;
	        UTR endUTR = null;
	        
	        //in BED file, if the gene is non-coding, both cdsStart and cdsEnd will be 0
	        if(cdsStart != start && cdsStart>1 )
	        {
	                cdsStart--;
	                if(strand.equals(Strand.Strand_Positive))
	                {
	                        //UTR5
	                        beginUTR = new UTR(start,cdsStart,5);
	                }
	                else
	                {       
	                        //UTR3
	                        beginUTR = new UTR(start,cdsStart,3);
	                }
	        }
	        int cdsEnd = Integer.parseInt(elements[7]);
	        if(cdsEnd != end && cdsEnd!=0)
	        {
	                cdsEnd++;  
	                if(strand.equals(Strand.Strand_Positive))
	                {
	                        //UTR3
	                        endUTR = new UTR(cdsEnd,end,3);
	                }
	                else
	                {
	                        //UTR5
	                        endUTR = new UTR(cdsEnd,end,5);
	                }   
	        }
	        int numOfExons = Integer.parseInt(elements[9]);
	        
	        //split the exons length (vec[10])
	        String [] vecExonsLength = elements[10].split(",");
	        
	        //split the exons offset (vec[11])
	        String [] vecExonsOffset = elements[11].split(",");
	                        
	        //ignore rest columns
	        int lastElementEnd = start;
	        for(int i=0;i<numOfExons;i++)
	        {
	                int exonStart = Integer.parseInt(vecExonsOffset[i]) + start;
	                int exonEnd = exonStart + Integer.parseInt(vecExonsLength[i]) -1;
	                int index = strand.equals(Strand.Strand_Positive)? i+1 : numOfExons-i;
	                int intronIndex = strand.equals(Strand.Strand_Positive)? i : numOfExons-i;
	                        
	                if(index == numOfExons)
	                {
	                        //Last exon is marked with -1
	                        index = -1;   
	                }
	                if(intronIndex == (numOfExons - 1))
	                {
	                        //Last intron is marked with -1
	                        intronIndex = -1;
	                }
	                if((exonStart - lastElementEnd) > 0)
	                {
	                        //add intron between exons
	                        GeneElement elem = new Intron(lastElementEnd + 1, exonStart - 1,intronIndex);
	                        if(elem!=null){
	                        	geneElementsVec.add(elem);
	                        }   
	                }
	                lastElementEnd = exonEnd;
	                
	                GeneElement elem = new Exon(exonStart ,exonEnd, index);
	                if(elem!=null){
	                geneElementsVec.add(elem);
	                }
	        }
	        Position g = new Position(start,end,name,null,geneElementsVec,chrom,strand,beginUTR,endUTR,"protein_coding");
	        return g;
        }
        catch (NumberFormatException e)
        {
        	throw new IOException("Invalid file format in line starting with \"" + line + "\"");
        }
        catch (ArrayIndexOutOfBoundsException e)
        {
        	throw new IOException("Invalid file format in line starting with \"" + line + "\"");
        }
	}
}

