#!/usr/bin/env perl
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
#             Mapping.pm odyssey
#
#     Copyright (C) 2001 Keio University
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# 
#   $Id: Mapping.pm,v 1.3 2001/09/05 11:14:31 s98982km Exp $

package G::Tools::Mapping;

use SubOpt;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;

@ISA = qw(Exporter AutoLoader);
# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.
@EXPORT = qw(
	     _mask_repeat_for_mapping
	     _foreach_mask_repeat_for_mapping
	     _cutquery_for_mapping
	     _blast_db_for_mapping
	     _formatdb_for_mapping
	     _blast_for_mapping
	     _blastpointer_for_mapping
	     _foreach_blastpointer_for_mapping
	     _file_list_for_mapping
);
$VERSION = '0.01';

#::::::::::::::::::::::::::::::
#        Methods Start
#::::::::::::::::::::::::::::::
sub new{
    my $pkg = shift;
    my $filename = shift;
    my $option = shift;
    my $this;

    return $this;
}


###################################################
#  mapping tools                                  #
###################################################
#mask_repeat
#cdna=$ARGV[2]
sub _mask_repeat_for_mapping{
    my $cdna=shift;
    my $query=shift;
    my $lib=shift;
    my @file;

    chdir($query);
    if($lib){
	system('RepeatMasker','-lib',"$lib",'-cutoff','250','-a',"$cdna");
    }
    else{
	system('RepeatMasker','-cutoff','250','-a',"$cdna");
    }

    rename $cdna,$cdna.'.original';
    rename $cdna.'.masked',$cdna;
#    system('cp',$cdna,$cdna.'.masked');
}


sub _foreach_mask_repeat_for_mapping{
    my $lib=shift;
    my $query=shift;
    my @file;

    opendir(DIR,$query);
    @file=readdir(DIR);
    
    foreach(@file){
	if(/\.fst/){
	    &mask_repeat_for_mapping($_,$query,$lib);
	}
    }
}


#cutquery
#$query_first=$ARGV[0]
sub _cutquery_for_mapping{
    my $filenumber;
    my $cdna=shift;
    my $query=shift;
    open(IN, $cdna);

    $filenumber = 0;
    while(<IN>){
	
        if($_ =~ /^>/){
	    $filenumber++;
	    open(OUT, ">$query$filenumber.fst");
	    printf OUT "$_";
        }

        else{
	    printf OUT "$_";
        }
    }
    
    close(OUT);
    close(IN);
}


###################################################
## $database = $ARGV[0]  |  $query = $ARGV[1]
## usage:perl blast_db.pl seq-file where_to_cut
sub _blast_db_for_mapping{
    my $database=shift;
    my $limit=shift;
    my $len;
    my $i=1;
    my $extra;
    my @lab;
    my $lens;
    my @file;
    my $data;
    my @filequery;
    my $k;
    
    opendir(DIRD, $database);
    @file=readdir(DIRD);
    closedir(DIRD);
    
    chdir $database;
    
    foreach $k (@file){
	
	open(INFILE, $k);
	open(OUTFILE,'>' . $k . "_blast");
	
	while(<INFILE>){
	    if(/\>/){
		@lab=split(/\s/,$_);
		print OUTFILE $lab[0],"_$i\n";
	    }
	    unless(/\>/){
		tr/\n//d;
		$len=$len+length($_);
		if($len>=$limit){
		    $i++;
		    $extra=$len-$limit;
		    print OUTFILE substr($_,0,length($_)-$extra);
		    print OUTFILE "\n",$lab[0],"_$i\n";
		    print OUTFILE substr($_,length($_)-$extra),"\n";
		    $len=$extra;
		}
		else{
		    print OUTFILE $_,"\n";
		}
	    }
	}
	close(OUTFILE);
	close(INFILE);
    }
}


###################################################
## $database=$ARGV[0]
## usage:perl formatdb.pl databasedirectory 
sub _formatdb_for_mapping{
    my @file;
    my $database=shift;
    
    opendir(DIRD, $database);
    @file=readdir(DIRD);
    
    closedir(DIRD);
    chdir($database);
    foreach(@file){
	
	system('formatdb','-i',"$_",'-p','F','-o','T') if(/\.fa$/);
    }
}


##################################################
## $database = $ARGV[0] | $query = $ARGV[1]
## usage:perl blast.pl databasedirectory querylist

sub _blast_for_mapping{
    
    my $database=shift;
    my $query=shift;
    
    my @filedata;
    my @filequery;
    my $data;
    
    
    
    opendir(DIRD, $database);
    @filedata=readdir(DIRD);
    closedir(DIRD);
 
##
    
    opendir(DIRQ, $query);
    @filequery=readdir(DIRQ);
    closedir(DIRQ);
    
##
    
    foreach(@filedata){
	if(/\.fa$/){
	    $data.=$_."\\ ";
	}
    }
    
    $data='"'.$data.'"';

    chdir($database);

    foreach(@filequery){
	if(/\.fst$/){
	    system('qr','blastall','-p','blastn','-d',"$data",'-i',"$query$_",'-o',"$query$_.rst",'-v','20','-b','20');
	}
    }
}


sub _blastpointer_for_mapping{
    my $filename=shift;
    my $limit=shift;
    my $switch;
    my $switch2;
    my $switch3;
    my $switch4;
    my @ID;
    my @aln;
    my @CHR;
    my @line;
    my $seq;
    my $len;
    my $hit;
    my @Evalue;
    my @sbjct_line;
    my @query_line;
    my %qpos;
    my %spos;
    my $q;
    my $tmp;
    my @tmp2;
    my $name;
    my $cond;
    my $start;
    my $stop;
    my %hash;
    my $ind;
    my $rind;
    my $Nnum;
    my $t;

    open(OUTFILE,'>>gene_list.txt');
    open(INFILE,$filename);
    while(<INFILE>){
	tr/\n//d;
	$cond=0;
	if($switch4==1){
	    $len=$_;
	    $len=~tr/\(\) letters\n//d;
	    $switch4=0;
	}
	if(/Query=/){
	    $name=$_;
	    $name=~s/Query= //;
	    $name=~tr/\n//d;
	    $switch4=1;
	}

	if(/Sequences producing significant alignments\:/){
	    $switch=1;
	}
	elsif($switch==1 && $_ ne "" && $_ !~ /\>/){
	    if(/(\S+)\s+\d+\s(.+)/){
		$line[0]=$1;
		$line[2]=$2;
	    }
	    if($line[2]=~m/e-/){
		@Evalue=split(/-/,$line[2]);
		if($Evalue[1]>100){
		    push(@ID,$line[0]);
		}
	    }
	    elsif($line[2]=="0.0"){
		push(@ID,$line[0]);
	    }    
	}
	if(/^\>/){
	    if(%qpos){
		@tmp2=sort{$a <=> $b}keys(%qpos);
		$tmp=substr($seq,$tmp2[0]-1,abs($tmp2[-1]-$tmp2[0])+1);
		$Nnum=$tmp=~tr/N/N/;
		if($Nnum<50){
		    @tmp2=sort{$a <=> $b}keys(%qpos);
		    for(my $i=$tmp2[0]-1;$i<=$tmp2[-1]-1;$i++){
			substr($seq,$i,1)="N";
		    }
		    foreach(keys(%spos)){
			$hash{$q}{pos}{$_}=1;
		    }
		}
	    }
	    %spos=();
	    %qpos=();
	    $hit=$seq=~tr/N/N/;
	    foreach $tmp (sort{$a <=> $b} keys(%hash)){
		@tmp2=sort{$a <=> $b} keys(%{$hash{$tmp}{pos}});
		$ind=index($seq,"N")+1;
		$rind=rindex($seq,"N")+1;
		print OUTFILE $name,"\t",$hash{$tmp}{chromosome},"\t",$tmp2[0],"\t",$tmp2[-1],"\t",$tmp2[-1]-$tmp2[0]+1,"\t",$hash{$tmp}{strand},"\t",$hash{$tmp}{evalue},"\t",$ind,"\t",$rind,"\t",sprintf("%.2f",$hit/$len),"\($hit/$len\)","\t",sprintf("%.2f",$hit/($tmp2[-1]-$tmp2[0]+1)),"\($hit/",$tmp2[-1]-$tmp2[0]+1,"\)","\t",((split(/\//,$filename))[-1]),"\n" if($hash{$tmp}{evalue});
	    }
	    %hash=();
	    $seq="";
	    for(my $i=0;$i<$len;$i++){
		$seq.="Y";
	    }
	    $switch=0;
	        
	    $q= $_;
	    $q=~ tr/\>\n //d;
	        
	    @CHR=split(/_/,$_);     
	    $CHR[1]=~tr/\n //d;
	        
	    foreach $t (@ID){
		if($q eq $t){
		    $switch=2;
		           
		    $hash{$q}{chromosome}=$_;
		    $hash{$q}{chromosome}=~s/\>//;
		    $hash{$q}{chromosome}=~s/\_\w+//;
		    $hash{$q}{chromosome}=~tr/\n//d;
		}
	    }
	}
	elsif($switch==2){
	    if(/Expect \= /){
		unless($hash{$q}{evalue}){
		    $hash{$q}{evalue}=(split(/ \= /,$_))[2];
		}
		if(%qpos){
		    @tmp2=sort{$a <=> $b}keys(%qpos);
		    $tmp=substr($seq,$tmp2[0]-1,abs($tmp2[-1]-$tmp2[0])+1);
		    $Nnum=$tmp=~tr/N/N/;
		    if($Nnum<50){
			@tmp2=sort{$a <=> $b}keys(%qpos);
			for(my $i=$tmp2[0]-1;$i<=$tmp2[-1]-1;$i++){
			    substr($seq,$i,1)="N";
			}
			foreach(keys(%spos)){
			    $hash{$q}{pos}{$_}=1;
			}
		    }
		}
		%spos=();
		%qpos=();
	    }
	    if(/Strand \= /){
		tr/\n//d;
		unless($hash{$q}{strand}){
		    $hash{$q}{strand}=(split(/ \/ /,$_))[1];
		}
		if($hash{$q}{strand} eq (split(/ \/ /,$_))[1]){
		    $switch3=0;
		}
		else{
		    $switch3=1;
		}
	    }
	    if($switch3==0){
		if(/Query:/ || $switch2==1){
		    $switch2=1;
		        
		    push(@aln,$_);
		     
		    if(/Sbjct:/){
			@query_line=split(/\s+/,$aln[0]);
			$qpos{$query_line[1]}=1;
			$qpos{$query_line[3]}=1;
			@sbjct_line=split(/\s+/,$aln[2]);
			$spos{$sbjct_line[1]+$limit*($CHR[1]-1)}=1;
			$spos{$sbjct_line[3]+$limit*($CHR[1]-1)}=1;
			@aln=();
			$switch2=0;
		    }  
		}
	    }
	}
    }    
    close(INFILE);
    close(OUTFILE);    
}


sub _foreach_blastpointer_for_mapping{
    my $dir=shift;
    my $limit=shift;
    my @filedata;
    my %hash;
    
    opendir(DIRD, $dir);
    
    @filedata=readdir(DIRD);
    closedir(DIRD);

    foreach(sort{$a <=> $b}@filedata){
	if(/\.rst$/){
	    &blastpointer_for_mapping($dir.$_,$limit);
	}
    }
}


sub _file_list_for_mapping{
    my $query=shift;
    my @file;
    my $line;

    opendir(DIR,$query);
    @file=readdir(DIR);
    
    open(OUT,'>ID_list.txt');
    chdir($query);
    foreach(@file){
	if(/\.fst$/){
	    open(FILE,$_);
	    $line=<FILE>;
	    $line=~tr/>\n//d;
	    close(FILE);
	    print OUT "$line\t$_\.rst\n";
	}
    }
    close(OUT);
}


sub DESTROY {
    my $self = shift;
}

1;
__END__
# Below is the stub of documentation for your module. You better edit it!

=head1 NAME

G::Tools::Mapping - Perl extension for blah blah blah

=head1 SYNOPSIS

  use G::Tools::Mapping;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for G::Tools::Mapping was created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head1 AUTHOR

A. U. Thor, a.u.thor@a.galaxy.far.far.away

=head1 SEE ALSO

perl(1).

=cut
