#!/usr/bin/perl -w
# embl2fasta - perl script that extracts the nucleotide sequence from an .embl file.
#
# embl2faafna - perl script that converts embl files into faa and fna files.
#
# Written by: Siomar C. Soares, Federal University of Minas Gerais (UFMG), 
#   Laboratory of Celular and Molecular Genetics, Brazil
#
# Date Written: June 1, 2011
#
#
#Usage for fna file = ./parseembl.pl myinput.fasta myinput.list
#Usage for faa file = ./parseembl.pl -p myinput.fasta myinput.list


use Getopt::Std;
use vars qw($opt_p);
getopts('p');

open (IN, $ARGV[0]);
@file = <IN>;
shift @file;
my $width = 60;
my $aa = 3;
my @file2;
my @file3;
for $i1 (@file){ chomp $i1; $file2 .= $i1;}
$file2 =~ s/a/a\t/g;
$file2 =~ s/c/c\t/g;
$file2 =~ s/g/g\t/g;
$file2 =~ s/t/t\t/g;
@file3 = split ("\t", $file2);
open (IN2, $ARGV[1]);
@file4 = <IN2>;
my $seq;
for $i2 (@file4) {
	@file5 = split (/[ ]+/, $i2);
	$beg = $file5[1]; $end = $file5[2]; $position = $file5[3];
	print $i2;
	$seq = "";
	$c=$beg;
	$c--;
	while ($c < $end){
	$seq .= $file3[$c]; $c++}
	if ($position =~ m/R/){$seq = reverse $seq; $seq =~ tr/atgc/tacg/;}
	if (!defined($opt_p)){ 
		for ($i=0;$i<length $seq;$i += $width) 	{ print substr($seq,$i,$width)."\n"; }}
	if (defined($opt_p)){ 
	$aa=3;
	$protein = "";
	for ($i=0;$i<(length ($seq) -3);$i += $aa)	{ $codon = substr($seq,$i,$aa); 
				$codon =~ s/ttt/F/g;
				$codon =~ s/tct/S/g;
				$codon =~ s/tat/Y/g;
				$codon =~ s/tgt/C/g;
				$codon =~ s/ttc/F/g;
				$codon =~ s/tcc/S/g;
				$codon =~ s/tac/Y/g;
				$codon =~ s/tgc/C/g;
				$codon =~ s/tta/L/g;
				$codon =~ s/tca/S/g;
				$codon =~ s/taa/\*/g;
				$codon =~ s/tga/\*/g;
				$codon =~ s/ttg/L/g;
				$codon =~ s/tcg/S/g;
				$codon =~ s/tag/\*/g;
				$codon =~ s/tgg/W/g;
				$codon =~ s/ctt/L/g;
				$codon =~ s/cct/P/g;
				$codon =~ s/cat/H/g;
				$codon =~ s/cgt/R/g;
				$codon =~ s/ctc/L/g;
				$codon =~ s/ccc/P/g;
				$codon =~ s/cac/H/g;
				$codon =~ s/cgc/R/g;
				$codon =~ s/cta/L/g;
				$codon =~ s/cca/P/g;
				$codon =~ s/caa/Q/g;
				$codon =~ s/cga/R/g;
				$codon =~ s/ctg/L/g;
				$codon =~ s/ccg/P/g;
				$codon =~ s/cag/Q/g;
				$codon =~ s/cgg/R/g;
				$codon =~ s/att/I/g;
				$codon =~ s/act/T/g;
				$codon =~ s/aat/N/g;
				$codon =~ s/agt/S/g;
				$codon =~ s/atc/I/g;
				$codon =~ s/acc/T/g;
				$codon =~ s/aac/N/g;
				$codon =~ s/agc/S/g;
				$codon =~ s/ata/I/g;
				$codon =~ s/aca/T/g;
				$codon =~ s/aaa/K/g;
				$codon =~ s/aga/R/g;
				$codon =~ s/atg/M/g;
				$codon =~ s/acg/T/g;
				$codon =~ s/aag/K/g;
				$codon =~ s/agg/R/g;
				$codon =~ s/gtt/V/g;
				$codon =~ s/gct/A/g;
				$codon =~ s/gat/D/g;
				$codon =~ s/ggt/G/g;
				$codon =~ s/gtc/V/g;
				$codon =~ s/gcc/A/g;
				$codon =~ s/gac/D/g;
				$codon =~ s/ggc/G/g;
				$codon =~ s/gta/V/g;
				$codon =~ s/gca/A/g;
				$codon =~ s/gaa/E/g;
				$codon =~ s/gga/G/g;
				$codon =~ s/gtg/V/g;
				$codon =~ s/gcg/A/g;
				$codon =~ s/gag/E/g;
				$codon =~ s/ggg/G/g;
				$protein .= $codon;}
	for ($i=0;$i<length $protein;$i += $width) 	{ print substr($protein,$i,$width)."\n"; }}
		}

#print $file2; 
#while (<>) {
#}
