ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/yamap/2qmfasta.pl
Revision: 1.1
Committed: Wed Dec 13 10:31:29 2006 UTC (9 years, 5 months ago) by gawi79
Branch: MAIN
CVS Tags: HEAD
Log Message:
A QuickMine Script

Line File contents
1 #!/usr/local/bin/perl
2
3 # 2qmfasta.pl v 0.02
4 #
5 # Cared for by Gareth Wilson (gawi@ceh.ac.uk)
6
7 use strict;
8 use Config::Simple;
9
10 unless (@ARGV ==1) {
11 die "\n\nUsage:\n ./2qmfasta.pl file.cfg\nPlease try again.\n\n\n";}
12
13
14 # pick up the values in the config file
15 my $config_file = shift;
16
17 # create a new object containing the variables in the cfg file
18 my $cfg = new Config::Simple($config_file);
19
20 # initialize the variables shared with the config file
21 my $path2proteins = $cfg->param('PATHS.path2proteins');
22 my $ext = $cfg->param('LEAVE_ALONE.ext');
23 my $path2output = $cfg->param('PATHS.path2output');
24 my $write_fasta_files = $cfg->param('LEAVE_ALONE.write_fasta_files');
25
26 # in the quickmine file - so leave as our
27 our @cmd = "";
28 my $fasta_file_ending = $cfg->param('LEAVE_ALONE.fasta_file_ending');
29
30
31 my @files = undef; # list of initial protein files
32 my $file = ""; # file with a list of proteins in fasta format
33 my $line = ""; # each line read from this file for parsing
34 my @line = ""; # lines of a fasta seq
35 my $seq = ""; # each fasta seq
36 my $header = ""; # header line of fasta file
37 my $abbr = ""; # abbreviations of genome
38 my $newfilename = ""; # each new fasta file
39
40
41 my $home = `pwd`;
42 chomp($home);
43 chdir "$path2proteins";
44
45 open (ABBR, ">$path2output/abbr.list") or die "Can't open $path2output/abbr.list for writing\n";
46
47 my $blast_database = "SELF_blast_database"; # fasta file of all proteins
48
49
50 @files = <*$ext>;
51
52 my $debug = 1;
53
54 if ($debug) { print "FILES: @files\n";}
55
56 open (SUMMARY, ">$path2output/$blast_database")
57 or die "can't open blast database $home/$blast_database for writing";
58
59
60 foreach $file (@files) {
61 my $count = "0000"; # orf count
62 open (IN, $file) or die "can't read file $file: $!\n";
63 open OUTGEN, ">$path2output/$file".".complete";
64 $header = <IN>;
65
66 if($header !~ /^>/){ die "$0: file doesn't begin with header line.\n";}
67
68 while ($header) {
69
70 if ($debug) { print "$header\n"; }
71 undef ($seq);
72
73 #read the sequence
74 while(($line = <IN>) && ($line !~ /^>/))
75 {
76 push (@line,$line);
77 }
78
79
80 $seq=join('',@line);
81 undef @line;
82
83
84 # process the sequence and write to file
85 $count++;
86 $file =~ /(.+)($ext)/;
87 $abbr = $1;
88
89
90 $newfilename = "$abbr"."orf"."$count.fasta";
91 $header =~ s/^>/>$newfilename /;
92 #this writes the SELF_blast_database
93 print SUMMARY "$header$seq";
94 print OUTGEN "$header$seq";
95 if ($write_fasta_files)
96 {
97 open (OUT, ">$path2output/$newfilename")
98 or die "can't open $path2output/$newfilename for writing: $!";
99 print OUT "$header$seq";
100 }else
101 {
102 print "Did not write individual fasta files\n";
103 }
104
105 if($line) { $header = $line;}
106 else { undef($header);}
107
108 } # end while $header
109
110 print ABBR "$abbr\n";
111
112 close IN;
113 close OUTGEN;
114 } # end foreach $file
115
116 print "Done\n";