ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/yamap/orphan_size.pl
Revision: 1.1
Committed: Wed Dec 13 10:42:46 2006 UTC (9 years, 11 months ago) by gawi79
Branch: MAIN
CVS Tags: HEAD
Log Message:
A QuickMine script

Line File contents
1 #!/usr/bin/perl -w
2
3 ###################################
4 #SCRIPT NAME: orphan_size.pl
5 #FUNCTION: Calculates the size of each orphan and creates relevant fasta files (short, long)
6 ###################################
7
8
9 use strict;
10
11 use Config::Simple;
12 use Bio::SeqIO;
13
14 my ($tag, @tags, @list, @seq, $orphan, $total, $large_count, $short_count, $genome, @genomes, $total_length, $mean_size, $genome_count, $non_orphan, @old_para, $old, $good_orphan, @good_orphans);
15
16
17 unless (@ARGV ==2) {
18 die "\n\nUsage:\n ./orphan_size.pl para_check configfile\nPlease try again.\n\n\n";}
19
20 # para_check determines whether the file should remove non-orphan paralogues from the list of orphans, 1 = remove. #Requires old_para file to remove.
21 my $para_check = shift;
22 my $config_file = shift;
23
24 my $cfg = new Config::Simple($config_file);
25 my $record_separator = $cfg->param('PARAMS.record_separator');
26 my $path2output = $cfg -> param('PATHS.path2output');
27 my $path2blast = $cfg -> param('PATHS.path2blast');
28 my $ext = $cfg -> param('PARAMS.ext');
29
30 if ($record_separator =~ "tab") {$record_separator = "\t"}
31
32 print "RECORD SEP $record_separator\n";
33
34 open (TAGS, "$path2output"."/abbr.list") or die "can't open abbr.list file";
35 open (OUT, ">$path2output"."/orphan_size.txt");
36 open (OUTH, ">$path2output"."/orphan_size.html");
37 print OUT "<PRE>Genome Summary Table\n";
38 print OUT "genome$record_separator total_orphans$record_separator long_orphans$record_separator short_orphans$record_separator Mean_orphan_length\n";
39 print OUTH "<PRE>Genome Summary Table\n";
40 print OUTH "genome$record_separator total_orphans$record_separator long_orphans$record_separator short_orphans$record_separator Mean_orphan_length\n";
41
42 while ($tag = <TAGS>)
43 {
44 chomp($tag);
45 push (@tags, $tag);
46 }
47
48 foreach $tag (@tags)
49 {
50 open (SEQOUT, ">$path2output/$tag"."_orphan.faa.complete");
51 print "$tag\n";
52 if ($tag=~m/,/)
53 {
54 $large_count = 0;
55 $short_count = 0;
56 $total = 0;
57 $total_length = 0;
58 @genomes = split /,/, $tag;
59 $genome_count = 0;
60 foreach $genome (@genomes)
61 {
62 &orphan_size($tag,$genome,$ext,$path2output,$path2blast,$genome_count,$para_check);
63 print "$tag,$genome,$ext,$path2output,$genome_count\n";
64 $genome_count++;
65 }
66
67 }
68 else
69 {
70 $large_count = 0;
71 $short_count = 0;
72 $total = 0;
73 $total_length = 0;
74 $genome_count = 1;
75 &orphan_size($tag,$tag,$ext,$path2output,$path2blast,$genome_count,$para_check);
76 }
77 close SEQOUT;
78
79 }
80 close OUT;
81 close OUTH;
82
83 sub orphan_size
84 {
85 my $tag = $_[0];
86 my $genome = $_[1];
87 my $ext = $_[2];
88 my $path2output = $_[3];
89 my $path2blast = $_[4];
90 my $genome_count = $_[5];
91 my $para_check = $_[6];
92 my $no_file = 0;
93 my @good_orphans = ();
94 open (LIST, "$path2output/$tag"."_orphan_list.html") or $no_file = 1;
95 @list = <LIST>;
96 close LIST;
97 if ($#list < 1)
98 {
99 $no_file = 1;
100 }
101 if ($para_check == 1)
102 {
103 open (PARA, "$path2output/old_para.txt") or die "can't open old_para.txt";;
104 @old_para = <PARA>;
105 close PARA;
106 }
107 print "no file = $no_file\n";
108 my $inseq = Bio::SeqIO ->new('-file' => "<$path2blast/$genome$ext".".complete", '-format' => 'fasta');
109 foreach $orphan (@list)
110 {
111 #if ($orphan =~m/\<a href=".*(.*orf.{4}).*"/)
112 if ($orphan =~m/>(.*orf.{4})/)
113 {
114 $orphan = $1;
115 if ($para_check == 1)
116 {
117 $old = 0;
118 foreach $non_orphan (@old_para)
119 {
120 chomp $non_orphan;
121 if ($orphan =~m/$non_orphan/)
122 {
123 $old = 1;
124 }
125 }
126 if ($old == 0)
127 {
128 push (@good_orphans, $orphan);
129 }
130 }
131 else
132 {
133 if ($orphan =~m/$genome/)
134 {
135 push (@good_orphans, $orphan);
136 }
137 }
138 }
139 }
140 print "good orphans = $#good_orphans\n";
141 foreach $good_orphan (@good_orphans)
142 {
143 if ($good_orphan =~m/$genome/)
144 {
145 while (my $seq = $inseq->next_seq)
146 {
147 my $orf_id = $seq->display_id;
148 #print "orf_id = $orf_id\n";
149 $orf_id =~m/(.+)\.fasta/;
150 my $orf = $1;
151 print "orf = $orf\n";
152 if ($orf =~m/$good_orphan/)
153 {
154 $total++;
155 my $length = $seq->length();
156 my $sequence = $seq->seq();
157 my $description = $seq->description;
158 $total_length = $length + $total_length;
159 print SEQOUT ">$orf_id $description\n$sequence\n";
160 if ($length >149)
161 {
162 $large_count++;
163 }
164 else
165 {
166 $short_count++;
167 }
168 last;
169 }
170 }
171 }
172 }
173
174 if ($no_file == 0)
175 {
176 if ($genome_count == 1)
177 {
178 $mean_size = $total_length/$total;
179 print OUT "$tag$record_separator$total$record_separator$large_count$record_separator$short_count$record_separator$mean_size\n";
180 print OUTH "$tag$record_separator$total$record_separator$large_count$record_separator$short_count$record_separator$mean_size\n";
181 }
182 }
183 else
184 {
185 print OUT "$tag$record_separator"."0$record_separator"."0$record_separator"."0$record_separator"."-\n";
186 print OUTH "$tag$record_separator"."0$record_separator"."0$record_separator"."0$record_separator"."-\n";
187 }
188 }