ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/msatfinder/concat
Revision: 1.1.1.1 (vendor branch)
Committed: Mon Mar 7 15:34:43 2005 UTC (11 years, 2 months ago) by knirirr
Branch: MAIN
CVS Tags: HEAD, HEAD
Changes since 1.1: +0 -0 lines
Log Message:
First import

Line File contents
1 #!/usr/bin/perl
2
3 # after finding repeats in multiple genomes (sequences)
4 # with msatfinder, run this script to concatenate
5 # all repeats belonging to the same genome for further
6 # processing
7
8 # where are the /Fasta repeat files?
9 $path2files = "/home/dfield/msatminer/baculomsats/Fasta/";
10
11 # where would you like to store the resulting concatenated genome files?
12 # no trailing slash
13 $path2output = "/home/dfield/msatminer/baculomsats/Fasta";
14
15 # change to the location of the repeat files
16 chdir "$path2files";
17 my @files = <*fasta>;
18
19 # keep the full names of the files before truncating to the unique part
20 my @full_file_names = @files;
21
22 # truncate to first part of name and get a list of unique names
23 foreach my $file (@files)
24 {
25 $file =~ s/(\w*)?(\.)?(.*)/$1/;
26 if ($debug) { print "File: $file\n"; }
27 $seen{$file}++;
28 if ( $seen{$file} <= 1 )
29 {
30 push ( @files_trunc, $file );
31 }
32 }
33
34 # print message to screen
35 print "Total genomes found: " . @files_trunc . "\n@files_trunc\n";
36
37 # concatenate all repeats into a file for the genome to which they belong
38 foreach my $file_trunc (@files_trunc)
39 {
40 open( OUT, ">$path2output/$file_trunc.con.fasta" )
41 or die "Can't open for writing\n";
42 foreach my $file (@full_file_names)
43 {
44 if ( $file =~ $file_trunc )
45 {
46 if ($debug) { print "Matches: $file"; }
47 open( IN, "$file" ) or die "Can't open $file for reading\n";
48 while (<IN>)
49 {
50 print OUT;
51 }
52 }
53 }
54 }