ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/yamap/orphan_count.pl
Revision: 1.1
Committed: Wed Dec 13 10:42:46 2006 UTC (9 years, 6 months ago) by gawi79
Branch: MAIN
CVS Tags: HEAD
Log Message:
A QuickMine script

Line File contents
1 #!/usr/bin/perl -w
2
3
4 ###################################
5 #SCRIPT NAME: orphan_count.pl
6 #FUNCTION: Counts the number of orphans present in _SELF_blastp_overview.html files
7 ###################################
8
9
10 use strict;
11
12 use Config::Simple;
13
14 my ($tag, @tags, $file, @lines, $total_orfans, $lines, $i, $orf, $query, $hits, $total_orfs, $percentage_orfans, @fields, $field_number, $family, $total_families, $tag_count, $real_tag_count);
15
16 unless (@ARGV ==3) {
17 die "\n\nUsage:\n ./orphan_count.pl self_hit count_end configfile\nPlease try again.\n\n\n";}
18
19 my $self_hit = shift;
20 my $count_end = shift;
21 my $config_file = shift;
22
23 my $cfg = new Config::Simple($config_file);
24 # get the record separator from the config file
25 my $record_separator = $cfg->param('PARAMS.record_separator');
26 # convert since use of \t in config file results in literal \t being printed
27
28 if ($record_separator =~ "tab") {$record_separator = "\t"}
29
30 print "RECORD SEP $record_separator\n";
31 my $grand_total_orfs = 0;
32 my $grand_total_orfans = 0;
33
34 open (TAGS, "abbr.list") or die "can't open abbr.list file";
35 open OUT, ">orphan_count.html";
36 print OUT "<PRE>Genome Summary Table\n";
37 print OUT "genome$record_separator total_orfs$record_separator total_orfans$record_separator percentage_orfans$record_separator Total families\n";
38
39 #open TOTAL, ">total_orphans.txt";
40
41 while ($tag = <TAGS>)
42 {
43 chomp($tag);
44 push (@tags, $tag);
45 }
46
47
48 # read each overview file in order of tags (NC numbers)
49 $tag_count = 0;
50 foreach $tag (@tags) {
51 $tag_count++;
52 $real_tag_count = $tag_count + 1;
53 $file = "$tag"."$count_end";
54 print "TAG: $tag - Counting orphans $file\n";
55 open (INPUT, "$file") or die "can't open file: $file";
56
57 @lines = <INPUT>;
58 close INPUT;
59 open ORPHAN, ">$tag"."_orphan_list.html";
60 print ORPHAN "<PRE>Proposed orphan genes in $tag\n";
61 #open NONS, ">$tag"."_orphans.txt";
62 $total_orfans = 0;
63 $total_families = 0;
64 $total_orfs = 0;
65 for ($i = 0; $i <= $#lines; $i++)
66 {
67 if ($lines[$i] ne "" && $i>1)
68 {
69 $orf = $lines[$i];
70 chomp($orf);
71 @fields = split /$record_separator/,$orf;
72 $field_number = $#fields;
73 $total_orfs++;
74 $hits = $fields[$field_number];
75 $family = $fields[$real_tag_count];
76 if ($family > 1)
77 {
78 $total_families = $total_families + 1;
79 }
80 $query = $fields[0];
81 if ($self_hit == 1)
82 {
83 if ($hits <= 1)
84 #if ($hits > 1)
85 {
86 print ORPHAN "$query\n";
87 #$query =~m/(NC_\d{6}orf\d{4})\.fasta/;
88 #my $txt_query = $1;
89 #print TOTAL "$txt_query\n";
90 #print NONS "$txt_query\n";
91 $total_orfans = $total_orfans + 1;
92 }
93 }
94 if ($self_hit == 0)
95 {
96 if ($hits == 0 )
97 {
98 print ORPHAN "$query\n";
99 $total_orfans = $total_orfans + 1;
100 }
101 }
102
103 }
104 }
105 if ($total_orfs == 0)
106 {
107 print "$tag - no entries!!!\n";
108 next;
109 }
110 print qq{Total ORFS in $tag = $total_orfs\n};
111 print qq{Total ORFans in $tag = $total_orfans\n};
112 $percentage_orfans = ($total_orfans/$total_orfs)*100;
113 $percentage_orfans = sprintf("%.2f",$percentage_orfans);
114 print qq{Percentage of ORFans in $tag = $percentage_orfans%\n};
115
116 print OUT qq{$tag$record_separator$total_orfs$record_separator$total_orfans$record_separator$percentage_orfans$record_separator$total_families\n};
117 close ORPHAN;
118 #close NONS;
119 $grand_total_orfs = $total_orfs + $grand_total_orfs;
120 $grand_total_orfans = $total_orfans + $grand_total_orfans;
121 } # next tag and file
122 my $grand_percentage = ($grand_total_orfans/$grand_total_orfs)*100;
123 $grand_percentage = sprintf("%.2f",$grand_percentage);
124 print OUT qq{\nGrand Total$record_separator$grand_total_orfs$record_separator$grand_total_orfans$record_separator$grand_percentage};
125 close OUT;
126 #close TOTAL;