#!/usr/bin/perl
#
# Copyright 2003 Sashidhar Gadiraju, Peter K. Rogan
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#
#
# program : wm2ribl
# version : 0.1
# Description : This program converts an input generic weight matrix into a 
# valid ribl file using the ridi program.

use strict;
use DGTools;
my $ifwm = "wmatrix";
my $ofribl = "ribl";
my @acgt=();

# STEP 0 : CLEANUP INTERMEDIATE FILES
unlink( qw/ dalvecp wave values rsdata symvec sequ riinst inst book encodep 
				cmp compp changes data encseq rip ribl ridip/ );

# STEP 1 : READ WEIGHT MATRIX
open(WM,"$ifwm") || die "ERROR: Cannot open $ifwm\n";
my $title = trim( scalar <WM> );
my $line2 = trim( scalar <WM> );
my $nrows = [split( /\s+/, $line2 )]->[0];	# num of rows, length of site
die "ERROR: nrows should be greater than zero\n" if($nrows <= 0 );
my $line3 = trim( scalar <WM> );
my $nseqs = [split( /\s+/, $line3 )]->[0];		# num of seqs
die "ERROR: nseqs should be greater than zero\n" if($nseqs <= 0 );
my $cnt=0;
while(<WM>)
{
	if(! (/^\s*\*|^\s*$/) )
	{	$acgt[$cnt++] = join("\t", @{ [split(/\s+/, trim($_) ) ] }[0..3]) ;}	# get a.c.g.t values
	else
	{	die "ERROR: Line $. is empty or starts with ",'*',"\n";	}
	last if($cnt == $nrows);
}
die "ERROR: Could not read $nrows rows of a,c,g,t frequencies from $ifwm\n" if($cnt != $nrows);
close WM;
my $from = 0;
my $to = $nrows -1;

# STEP 2 - Write temporary ribl file
open( RIBL, ">$ofribl" ) || die "ERROR: Cannot open outputfile $ofribl\n";
my $oldfh = select(RIBL);

print "ribl \"$title\"
*WARNING: This is a GENERIC WEIGHT MATRIX . NOT A VALID RIBL FILE
*	i(a,l)\tRi(c,l)\tRi(g,l)\tRi(t,l)\tl\ta\tc\tg\tt
\t${from}\t${to} frombase, tobase\n";
$cnt=0;
foreach (@acgt)
{	print "\t0\t0\t0\t0\t",$cnt++,"\t",$_,"\n";	}
print "*
   0.0 bits = mean (Rsequence of selected region)
    0.0 bits = standard deviation
*
   0 bits = Ri of consensus sequence from $from to $to
   0 bits = Ri of anticonsensus sequence from $from to $to
*
  0 bits = average Ri for random sequence from $from to $to
*
         $nseqs n, number of sequences used to create the matrix
*
asymmetric symmetry of the matrix
*
-5000.000000 Ri bound: lower bound on Ri
100.000000 Z bound: lower bound on Z
1.000000 P bound: upper probability
*
.
";

select($oldfh);
close RIBL;

# STEP 3 - Run ridi to get sequ
open(RD,">ridip") || die "ERROR: Cannot open output file ridip";
print RD "1.09\n${nseqs}\n-1\n-5000\n";
close RD;
my $res = system("ridi");
die("ERROR: in execution of ridi") if($res || (! -f "sequ") );
unlink("ribl");	# remove the temp. ribl file

# STEP 4 - Generate a book
$res = system(" echo -e \"\na\n${title}\n\" | makebk ");
die("ERROR: in execution of makebk") if($res || (! -f "book") );

# STEP 5 - Run comp
dtouch("compp");
$res = system("comp");
die("ERROR: in execution of comp") if($res || (! -f "cmp") );

# STEP 6 - Encode
dtouch("inst");
open(EC,">encodep") || die "ERROR: Cannot open output file encodep";
print EC "f\n0 $to \n1 \n1\n 1\n 1\n";	close EC;
$res = system("encode");
die("ERROR: in execution of encode") if($res || (! -f "encseq") );

# STEP 7 - Rseq - get the rsdatafile
$res = system("rseq");
die("ERROR: in execution of rseq") if($res || (! -f "rsdata") );

# STEP 8 - Run dalvec
dtouch("dalvecp");
$res = system("dalvec");
die("ERROR: in execution of dalvec") if($res || (! -f "symvec") );

# STEP 9 - Run ri - get the final ribl file
dtouch("values"); dtouch("wave");
open(RIP,">rip") || die "ERROR: Cannot open output file rip";
print RIP "2.54 \n\"${title}\" \n0 $to \n1 \na \na \nn \nn \n- \ns 2 \nf \n-5000 \n100 \n1.00 \n";
close RIP;
$res = system("ri");
print STDERR ("ERROR: in execution of ri") if($res || (! -f "ribl") );

# STEP 10 - Plug back the values from the user weight matrix into the new ribl
my %hribl = parseRibl();
die "ERROR: in parseRibl: $DRS\n" if($DR);
open(RO, ">ribl.tmp") || die "ERROR: Cannot open output file 'ribl.tmp'";
print RO "ribl \"",$hribl{'title'},"\"\n";
print RO "*WARNING! This ribl has been created from a USER DEFINED WEIGHT MATRIX.\n";
print RO $hribl{'from'}," ",$hribl{'to'}," fromwanted, towanted \n";
$cnt = 0;
foreach ( @{$hribl{'matrix'}} )
{
	my @vals = split(/\s+/, trim($_) );
	print RO "\t",join("\t",@vals[0..4]),"\t",$acgt[$cnt++],"\n";
}

print RO "*
   $hribl{'RiMean'} bits = mean (Rsequence of selected region)
   $hribl{'RiSD'} bits = standard deviation
*
   $hribl{'RiCon'} bits = Ri of consensus sequence from $from to $to
   $hribl{'RiAntiCon'} bits = Ri of anticonsensus sequence from $from to $to
*
  $hribl{'RiAvg'} bits = average Ri for random sequence from $from to $to
*
         $nseqs n, number of sequences used to create the matrix
*All parameters below this line are DEFAULT params 
*The user should HAND EDIT these according to his needs
asymmetric symmetry of the matrix
*
-5000.000000 Ri bound: lower bound on Ri
  100.000000 Z bound: lower bound on Z
    1.000000 P bound: upper probability
*
l  extreme: char; h or l, the high or low extreme to be defined
 0.0  wavelocation: real; the location in bases of the extreme
 1.0  wavebit: real; the location in bits of the extreme
 0.5  waveamplitude: real; the amplitude of the wave in bits
10.6  wavelength: real; the wave length of the wave in bases
 0.4  dash: real; the size of dashes in cm.  dash <= 0 means no dashes
 0.0  thickness: real; the thickness of the cosine wave.  <=0 means default
.
";
close RO;
unlink("ribl");
rename("ribl.tmp", "ribl");

# STEP back to square 0 : CLEANUP INTERMEDIATE FILES
unlink( qw/ dalvecp wave values symvec rsdata sequ riinst inst book encodep 
				cmp compp changes data encseq rip ridip/ );
