package org.peakAnnotator;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import org.peakAnnotator.Position.Strand;

public class GTFParser extends Parser {
	
	private String[] lastLineTokens;
	
	public GTFParser (String filename) throws IOException{
		super(filename);
		lastLineTokens = null;
		//check the gtf format
		m_File.mark(1000);
		String line = m_File.readLine();
		String[] fields = this.split(line);
		if(!fields[8].equals("gene_id"))
		{
			throw new IOException("The annotation file doesn't seems to be in GTF format, \"gene_id\" definition is missing");
		}
		m_File.reset();
	}
	private String[] split(String line){
		String[] temptokens = line.split("[;\t\\s\"]");
		ArrayList<String> tokens = new ArrayList<String>();
		//remove empty entries
		for(int i = 0; i < temptokens.length; ++i) 
		{
			  if(temptokens[i] != null && !temptokens[i].equals("")) 
			  {
				  tokens.add(temptokens[i]);
			  }
		}
		String[] copytokens = new String[tokens.size()];
		return (String[]) (tokens.toArray(copytokens));
	}
	public Position GetPosition() throws IOException{
		//parse all lines that related to one gene
		int transcriptIdTokenIndex = 11;
        int symbolTokenIndex = 15;
        int sourceIndex=1;
		String line=m_File.readLine();
		
		if( line==null || line.equals("") )
		{
			if(lastLineTokens==null)
				return null;
			else
			{
				//treat case where last line in the file is a new gene
				ArrayList<GeneElement> geneElements = new ArrayList<GeneElement>();
				Exon exon = new Exon(Integer.parseInt(lastLineTokens[3]), Integer.parseInt(lastLineTokens[4]), 1);
	            geneElements.add(exon);
				Strand strand = lastLineTokens[6].equals("+") ? Strand.Strand_Positive : Strand.Strand_Negative;
				String symbol=null;
		        try
		        {
		        	 symbol = lastLineTokens[symbolTokenIndex];
		        }
		        catch (ArrayIndexOutOfBoundsException e)
		        {
		        	symbol = null;
		        }
				Position position = new Position(Integer.parseInt(lastLineTokens[3]), Integer.parseInt(lastLineTokens[4]), lastLineTokens[transcriptIdTokenIndex], symbol, geneElements, lastLineTokens[0], strand, null, null,lastLineTokens[sourceIndex]);
				lastLineTokens=null;
				return position; 
			}

		}
	
		m_currentOffset += line.length()+1;
		
		ArrayList<String[]> transcriptionLines = new ArrayList<String[]>();
		ArrayList<String[]> exonLines = new ArrayList<String[]>();
		if(lastLineTokens!=null)
		{
			transcriptionLines.add(lastLineTokens);
			
			if (lastLineTokens[2].equals("exon"))
            {
                exonLines.add(lastLineTokens);
            }
		}
		String[] tokens = split(line);
		while (lastLineTokens==null || lastLineTokens[transcriptIdTokenIndex].equals(tokens[transcriptIdTokenIndex]))
        {
            transcriptionLines.add(tokens);
            if (tokens[2].equals("exon"))
            {
                exonLines.add(tokens);
            }
            lastLineTokens=tokens;
          
            line = m_File.readLine();
            if (line == null)
            {
               lastLineTokens = null;//Handled
               break;
            }
            m_currentOffset += line.length()+1;
            tokens= split(line);
        }
		if(lastLineTokens!=null && !lastLineTokens[transcriptIdTokenIndex].equals(tokens[transcriptIdTokenIndex]) )
		{
				lastLineTokens = tokens;
		}
		
		Strand strand = transcriptionLines.get(0)[6].equals("+") ? Strand.Strand_Positive : Strand.Strand_Negative;
        //check if for negative strand the order of the lines is ascending or descending
        if (strand == Strand.Strand_Negative && exonLines.size() > 1)
        {
            int startFirstLine = Integer.parseInt(exonLines.get(0)[3]);
            int startSecondLine = Integer.parseInt(exonLines.get(1)[3]);                
            if (startSecondLine < startFirstLine)
            {
                //need to reverse the lines' order
                Collections.reverse(transcriptionLines);
                Collections.reverse(exonLines);
            }
        }
        //stop
        
        String	chrom = transcriptionLines.get(0)[0].toUpperCase();
        
        int start = Integer.parseInt(exonLines.get(0)[3]);
        int end = Integer.parseInt((exonLines.get(exonLines.size() - 1)[4]));
        String Id = transcriptionLines.get(0)[transcriptIdTokenIndex];
        String source = transcriptionLines.get(0)[sourceIndex];
        
        String symbol=null;
        try
        {
        	 symbol = transcriptionLines.get(0)[symbolTokenIndex];
        }
        catch (ArrayIndexOutOfBoundsException e)
        {
        	symbol = null;
        }
        ArrayList<GeneElement> geneElements = new ArrayList<GeneElement>();
        UTR beginUTR = null;
        UTR endUTR = null;
        //We first need to count the number of exons
        int numOfExon = exonLines.size();  
        for (int i = 0; i < exonLines.size(); i++)
        {
            String[] linesTokens = exonLines.get(i);
            int exonIndex;
            int intronIndex;
            if (strand.equals(Strand.Strand_Positive))
            {
                exonIndex = i + 1;
                intronIndex = i + 1;
                if (i == numOfExon - 1)
                {
                    //last exon is marked with -1
                    exonIndex = -1;
                    intronIndex = -1;
                }
            }
            else
            {
                exonIndex = numOfExon - i;
                intronIndex = numOfExon - i - 1;
                if (i == 0)
                {
                    //last exon is marked with -1
                    exonIndex = -1;
                    intronIndex = -1;
                }
            }
            Exon exon = new Exon(Integer.parseInt(linesTokens[3]), Integer.parseInt(linesTokens[4]), exonIndex);
            geneElements.add(exon);
            if (i != exonLines.size() - 1)
            {
                //if it is not the last line (exon)
                Intron intron = new Intron(Integer.parseInt(linesTokens[4]) + 1, Integer.parseInt(exonLines.get(i + 1)[3]) - 1,intronIndex);
                geneElements.add(intron);
            }
        }
        for (int i = 0; i < transcriptionLines.size(); i++)
        {
            String[] linesTokens = transcriptionLines.get(i);
            if (linesTokens[2].equals("exon"))
            {
                //already handled
                continue;
            }
            
            /*if (linesTokens[2].equals("start_codon"))
            {
                int utrIndex = strand == Strand.Strand_Positive ? 5 : 3;
                beginUTR = new UTR(start, Integer.parseInt(linesTokens[3]) - 1, utrIndex);
            }
            else if (linesTokens[2].equals("stop_codon"))
            {
                int utrIndex = strand == Strand.Strand_Positive ? 3 : 5;
                endUTR = new UTR(Integer.parseInt(linesTokens[3]), end, utrIndex);
            }*/
            if (linesTokens[2].equals("start_codon"))
            {
                int utrIndex = 5;
                if(strand == Strand.Strand_Positive)
                {
                	beginUTR = new UTR(start, Integer.parseInt(linesTokens[3]) - 1, utrIndex);
                }
                else
                {
                	endUTR = new UTR(Integer.parseInt(linesTokens[4])+1, end, utrIndex);
                }
            }
            else if (linesTokens[2].equals("stop_codon"))
            {
                int utrIndex = 3;
                if(strand == Strand.Strand_Positive)
                {
                	endUTR = new UTR(Integer.parseInt(linesTokens[4])+1,end , utrIndex);
                }
                else
                {
                	beginUTR = new UTR(start, Integer.parseInt(linesTokens[3])-1, utrIndex);
                }
            }
            else if (linesTokens[2].equals("CDS"))
            {
                //ignore
                continue;
            }
        }
   
        Position position = new Position(start, end, Id, symbol, geneElements, chrom, strand, beginUTR, endUTR,source);
        return position; 
	}
}
