ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/yamap/hitsparser.pl
Revision: 1.1
Committed: Wed Dec 13 10:45:01 2006 UTC (9 years, 7 months ago) by gawi79
Branch: MAIN
CVS Tags: HEAD
Log Message:
A QuickMine script

Line File contents
1 #!/usr/bin/perl
2
3 ##############################################
4 ##############################################
5 # SCRIPT NAME: hit_summarizer
6 # FUNCTION: summarize *overview.html files created by get_orphans
7 # AUTHOR: Cared for by Gareth Wilson (gawi@ceh.ac.uk)
8 ##############################################
9
10 use strict;
11
12 # Takes an input file ending in .overview.html as an input
13
14 use Config::Simple;
15
16 ###########################################################
17 # Parse the command line and die if it doesn't look right #
18 ###########################################################
19
20
21 unless (@ARGV ==2) {
22 die "\n\nProper Command Line Usage: hitsparser.pl *overview.html configfile \nPlease try again.\n\n\n";}
23
24 # shift the first command line arg passed through @ARGV
25 my $file = shift;
26 my $config_file = shift;
27
28 unless ($file =~ /overview\.html$/) {
29 die "\n\n Your input file must end with overview.html\nPlease try again.\n\n
30 ";}
31
32 # create a new object containing the variables in the cfg file
33 my $cfg = new Config::Simple($config_file);
34 my $path2output = $cfg ->param('PATHS.path2output');
35 # get the record separator from the config file
36 my $record_separator = $cfg->param('PARAMS.record_separator');
37 # convert since use of \t in config file results in literal \t being printed
38
39 if ($record_separator =~ "tab") {$record_separator = "\t"}
40
41 #print "RECORD SEP $record_separator\n";
42
43
44 my (@lines, @header) = ();
45 open (IN, "$file") or die "can't open file: $file";
46
47 # pick up all lines in file
48 @lines = <IN>;
49 # get rid of summary line
50
51 my ($header, $junk, $seqs, $total, $total_minus_self, $i, $hits, $total_hits, $line, $total_hits, $last, @count, $total_species);
52
53
54 $junk = shift @lines;
55 $header = shift @lines;
56 chomp($header);
57 $seqs = @lines;
58
59
60 @header = split(/$record_separator/,$header);
61
62 $total = @header;
63 $total = $total - 1; # minus file name and 'self' column
64 # print some details of file
65
66 print "<PRE>";
67 print "path2 = $path2output, file = $file\n";
68 print "Total sequences in file: $seqs\n";
69 $total_minus_self = $total-2;
70 print "Unique Data Sets compared including SELF: $total_minus_self\n";
71
72
73 # take off last field
74
75 # SUMMARY 1
76 print "Genome$record_separator genes with hit$record_separator percentage hits$record_separator total hits\n";
77 for ($i=1; $i<=$total-1; $i++) {
78 ($hits, $total_hits) = 0;
79 foreach $line (@lines) {
80 chomp($line);
81 my @fields = split(/$record_separator/,$line);
82 $hits++ if $fields[$i] > 0;
83 $total_hits = $total_hits + $fields[$i];
84 }
85 my $per_hits = ($hits/$seqs)*100;
86 print "$i$record_separator$header[$i]$record_separator$hits$record_separator$per_hits$record_separator$total_hits\n";
87 }
88
89 #SUMMARY 2
90
91 print "\nData sets matched (not including self), number of genes that match this number of data sets\n";
92 foreach $line (@lines) {
93 chomp($line);
94 my @fields = split(/$record_separator/,$line);
95 $last = @fields;
96 $total_species = $fields[$last-1];
97 $count[$total_species]++;
98
99 }
100
101 # one less than the number of datasets - doesn't include SELF
102 for ($i=1; $i<$total; $i++) {
103 print "$i$record_separator$count[$i]\n";
104 }