/*
 *
 * This file is part of Genome Artist.
 *
 * Genome Artist is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Genome Artist is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Genome Artist.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package ro.genomeartist.gui.utils;

import java.util.StringTokenizer;

/**
 *
 * @author iulian
 */
public class DNAUtils {
    /**
     * Fac reverse complement la un string
     * @param sequence
     * @return 
     */
    public static String reverseComplementSequence(String sequence) {
        StringBuilder stringBuilder = new StringBuilder();

        for (int i = 0; i < sequence.length(); i++) {
            char c = sequence.charAt(i);
            stringBuilder.append(complementNucleotide(c));
        }

        return stringBuilder.reverse().toString();
    }
    
    /**
     * Elimina spatiile, enterurile dintr-un string
     * @param name
     */
    public static String sanitizeSequence(String sequence) {
        String result = new String();

        //Detect format
        String squeezedString = StringUtils.squeezeString(sequence);
        result = squeezedString;

        //FASTA. begins with ">"
        //GENBANK. ends with "\\" and contains keywork ORIGIN
        if (squeezedString.startsWith(">") || squeezedString.startsWith(";")) {
            result = getSequenceFromFasta(sequence);
        } else
        if (squeezedString.contains("ORIGIN") && squeezedString.endsWith("//")) {
            result = getSequenceFromGenbank(sequence);
        }

        //Ultima verificare asupra stringului. pastrez doar nucleotide
        result = cleanString(result);

        //Intorc rezultatul
        return result;
    }

    /**
     * Obtin secventa dintr-un format fasta
     * FASTA:
     *      liniile ce incep cu > sau ; se ignora
     * @param sequence
     * @return secventa continuta
     */
    private static String getSequenceFromFasta(String sequence) {
        String result = new String();
        StringTokenizer st = new StringTokenizer(sequence,"\n",false);
        String auxString;
        while (st.hasMoreElements()) {
            auxString = StringUtils.squeezeString(st.nextToken());
            if (auxString.startsWith(">") || auxString.startsWith(";"))
                continue;
            else
                result += auxString;
        }

        //Intorc decat secventa
        return result;
    }

    /**
     * Obtin secventa din genbank
     * GENBANK:
     *      - secventa incepe dupa keyword-ul ORIGIN
     *      - secventa se termina inainte de linia ce se sfarseste cu "\\"
     * @param sequence
     * @return
     */
    private static String getSequenceFromGenbank(String sequence) {
        String result = new String();
        StringTokenizer st = new StringTokenizer(sequence,"\n",false);

        boolean foundOrigin = false;
        String auxString;
        while (st.hasMoreElements()) {
            auxString = StringUtils.squeezeString(st.nextToken());
            if (auxString.startsWith("ORIGIN") ||
                    auxString.startsWith("origin") ||
                    auxString.startsWith("origin")) {
                foundOrigin = true;
                auxString = auxString.substring("ORIGIN".length());
            }

            if (foundOrigin)
                result += auxString;
        }

        //Intorc decat secventa
        return result;
    }
    
    /**
     * Pastreaza doar nucleotidele dintr-un string dat
     * @param sequence
     * @return
     */
    private static String cleanString(String sequence) {
        String result = new String();

        for (int i = 0; i < sequence.length(); i++) {
            char c = sequence.charAt(i);
            if (isNucleotide(c))
                result += toNucleotide(c);
        }

        return result;
    }
    
    /**
     * Verifica daca un caracter este nucleotida sau nu
     * @param c
     * @return true daca c este nucleotida
     */
    public static boolean isNucleotide(char c) {
        switch (c) {
            case 'a':case 'A':
            case 'c':case 'C':
            case 'g':case 'G':
            case 't':case 'T':
            case 'n':case 'N':
            case '.':
                return true;
            default:
                return false;
        }
    }

    /**
     * Verifica daca un caracter este nucleotida sau nu
     * @param c
     * @return true daca c este nucleotida
     */
    public static char toNucleotide(char c) {
        switch (c) {
            case 'a':case 'A':
            case 'c':case 'C':
            case 'g':case 'G':
            case 't':case 'T':
                return c;
            default:
                return 'N';
        }
    }
    
    /**
     * Verifica daca un caracter este nucleotida sau nu
     * @param c
     * @return true daca c este nucleotida
     */
    public static char complementNucleotide(char c) {
        switch (c) {
            case 'a':case 'A':
                return 'T';
            case 'c':case 'C':
                return 'G';
            case 'g':case 'G':
                return 'C';
            case 't':case 'T':
                return 'A';
            case 'n':case 'N':
                return 'N';
            default:
                return c;
        }
    }
}
