#!/usr/bin/perl

# MFold Submission/Result Retrieval Script
# Created by Wade Schulz (schu1321@umn.edu)
# University of Minnesota, Department of Medicine
# Division of Rheumatic and Autoimmune Disease

# MFold Created by Dr. Michael Zuker
# "Mfold web server for nucleic acid folding and hybridization prediction. Nucleic Acids Res. 31 (13), 3406-15, (2003)"

use strict;
use Bio::Perl;
use IO::Socket;
use Cwd;

# Print out program info
print_version();

# Get Accession Number from User
print "Enter Accession: ";
my $accession = <STDIN>;
chomp($accession);

# Get Sequence, Fold Sequence, Download Data
get_seqs($accession);

exit(0);

# Main program function - gets sequence and drives other functions to fold/download data
sub get_seqs{
    my ($accession) = @_;
    # Obtain Seq Object from GenBank
    my $db_obj = Bio::DB::GenBank->new;
    my $seq = $db_obj->get_Seq_by_acc($accession);
    
    # Determine sequence length, start and end nucelotides for folding
    my $length = length($seq->seq);
    print "Sequence is $length nucleotides long.\n";
    print "Enter start nucleotide for folding: ";
    my $start = <STDIN>;
    print "Enter end nucleotide for folding: ";
    my $end = <STDIN>;
    chomp($start);
    chomp($end);
    
    # Get Sequence Name/Desc from User
    print "Enter sequence name: ";
    my $seqname = <STDIN>;
    $seqname =~ s/\n//ig;
    $seqname .= "_$start-$end";
    
    # Print Status
    print "Folding \"", $seq->desc, "\" from nucleotide $start to $end of total $length\n";
    # Fold sequence, obtain address for results
    my $address = fold($seqname, $start, $end, $seq->seq);
    
    
    my $filename;
    if($address =~ /^.*old\/mfold\/(\d.*\/.*)\/.*/){
        $filename = $1;
    }
    
    # Create directory for results
    my $download;
    my $cwd;
    my $type;
    my $path;
    my $param;
    $cwd = getcwd();
    mkdir("$cwd/$seqname/");
    my $success=0;
    my $fails=0;
    
    ########################
    # Download SS file -- add ending to $address, pass info to sub socket_download
    ########################
    $success = $fails = 0;
    $path = ">$cwd/$seqname/$seqname.ss-count";
    $download = $address . ".ss-count";
    $type = "SS Count";
    $success = socket_download(0, $download, $path, $type);
    while($success == 1 && $fails < 5){
        print "SS Count Not Complete. Pausing for 20 seconds.\n";
        my $count = 0;
        while($count<20){
            sleep(1);
            $count++;
            print ".";
        }
        print "\n";
        $success = socket_download(0, $download, $path, $type);
        $fails += $success;
    }
    ########################
    # Download h-num file -- add ending to $address, pass info to sub socket_download
    ########################
    $success = $fails = 0;
    $path = ">$cwd/$seqname/$seqname.h-num";
    $download = $address . ".h-num";
    $type = "H-Num";
    $success = socket_download(0, $download, $path, $type);
    while($success == 1 && $fails < 5){
        print "H-num Not Complete. Pausing for 20 seconds.\n";
        my $count = 0;
        while($count<20){
            sleep(1);
            $count++;
            print ".";
        }
        print "\n";
        $success = socket_download(0, $download, $path, $type);
        $fails += $success;
    }
    ########################
    # Download P-num file -- add ending to $address, pass info to sub socket_download
    ########################
    $success = $fails = 0;
    $path = ">$cwd/$seqname/$seqname.ann";
    $download = $address . ".ann";
    $type = "P-num";
    $success = socket_download(0, $download, $path, $type);
    while($success == 1 && $fails < 5){
        print "P-num Not Complete. Pausing for 20 seconds.\n";
        my $count = 0;
        while($count<20){
            sleep(1);
            $count++;
            print ".";
        }
        print "\n";
        $success = socket_download(0, $download, $path, $type);
        $fails += $success;
    }
    
    ########################
    # Compress, download CT files
    ########################
    $success = $fails = 0;
    $path = ">$cwd/$seqname/$seqname.ct.zip";
    $param = "FILE_NAME=$filename/1&MODE=zip&STYPE=ct&SUB_DIR=$filename";
    $param =~ s/\//\%2F/ig;
    sock_compress($param);
    $download = $address . ".ct.zip";
    $type = "CT Zip";
    $success += socket_download(1, $download, $path, $type);
    while($success == 1 && $fails < 5){
        print "CT Zip file not complete. Pausing for 20 seconds.\n";
        my $count = 0;
        while($count<20){
            sleep(1);
            $count++;
            print ".";
        }
        print "\n";
        $success = socket_download(1, $download, $path, $type);
        $fails += $success
    }
    
    ########################
    # Compress, download JPG files
    ########################
    $success = $fails = 0;
    $path = ">$cwd/$seqname/$seqname.jpg.zip";
    $param = "FILE_NAME=$filename/1&MODE=zip&STYPE=jpg&SUB_DIR=$filename";
    $param =~ s/\//\%2F/ig;
    sock_compress($param);
    $download = $address . ".jpg.zip";
    $type = "JPG Zip";
    $success += socket_download(1, $download, $path, $type);
    while($success == 1 && $fails < 5){
        print "JPG Zip file not complete. Pausing for 20 seconds.\n";
        my $count = 0;
        while($count<20){
            sleep(1);
            $count++;
            print ".";
        }
        print "\n";
        $success = socket_download(1, $download, $path, $type);
        $fails += $success
    }
    
    ########################
    # Compress, download Vienna files
    ########################
    $success = $fails = 0;
    $path = ">$cwd/$seqname/$seqname.b.zip";
    $param = "FILE_NAME=$filename/1&MODE=zip&STYPE=Vienna&SUB_DIR=$filename";
    $param =~ s/\//\%2F/ig;
    sock_compress($param);
    $download = $address . ".b.zip";
    $type = "Vienna Zip";
    $success += socket_download(1, $download, $path, $type);
    while($success == 1 && $fails < 5){
        print "Vienna Zip file not complete. Pausing for 20 seconds.\n";
        my $count = 0;
        while($count<20){
            sleep(1);
            $count++;
            print ".";
        }
        print "\n";
        $success = socket_download(1, $download, $path, $type);
        $fails += $success
    }
    
    
    print "Finished! Files can be found at: $cwd/$seqname\n";
    
    
    
    # Compress files (ex. CT, jpg, etc)
    sub sock_compress{
        my ($param) = @_;
        ######################################
        # Open the socket
        ######################################
        my $sock = new IO::Socket::INET(
                   PeerAddr=>'www.bioinfo.rpi.edu',
                   PeerPort=>'80',
                   Proto=>'tcp'
               );
        die "Could not create socket: $!\n" unless $sock;
        
        ######################################
        # Send information
        ######################################
        print $sock "GET /applications/mfold/old/cgi-bin/mfold-compress_img.cgi?" . $param . " HTTP/1.1\n" .
                    "Host: www.bioinfo.rpi.edu\n" .
                    "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.1) Gecko/20060111 Firefox/1.5.0.1\n" .
                    "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\n" .
                    "Accept-Language: en-us,en;q=0.5\n" .
                    "Accept-Encoding: gzip,deflate\n" .
                    "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\n" .
                    "Keep-Alive: 300\n" .
                    "Connection: keep-alive\n" .
                    "Referer: http://www.bioinfo.rpi.edu/\n\n";
          
        
        print "Compressing files...\n";
        ########################################
        # Receive Information
        ########################################
        my $temp = 0;
        while( my $line = <$sock> && $temp < 5){
        #print $line, "\n";
        $temp++;
        }
        
        ########################
        # Close Socket
        ########################
        close($sock);
    }

}

# Function to open socket, fold sequence, and obtain page data to search for path
sub fold{
    my ($seqname, $start, $end, @sequence) = @_;
    ########################
    # Set folding parameters
    ########################
    my $constraints = "";
    my $LorC = "linear";
    my $percent = "5";
    my $maxfolds = "50";
    my $window = "default";
    my $max_lp = "30";
    my $max_as = "30";
    my $maxbp = "no+limit";
    my $batch = "An+Immediate";
    my $email_addr = "schu1321\@umn.edu";
    my $gifres = "72x72";
    my $mode = "auto";
    my $grid = "ON";
    my $lab_fr = "default";
    ## Offset may change -- see in do not edit section
    my $offset = 0;
    my $rot_ang = "0";
    my $ann = "None";
    my $max_length1 = "800";
    my $max_length2 = "6000";
    my $na = "RNA";
    my $rh = "schu1321\@umn.edu";
    
    ######################################
    # MFold Page information (If changed)
    ######################################
    my $page = '/applications/mfold/old/cgi-bin/nph-mfold-3.1.cgi';
    
    ###################################################
    ## DO   EDIT     THESE
    ##   NOT    BELOW     LINES
    ###################################################

    # Create sequence from start-end
    my @fullseq;
    my $nt=$start;
    my @seq = split('', $seq->seq);
    
    while($nt<$end){
        $fullseq[$nt] = $seq[$nt];
        $nt++;
    }
    my $fullseq = join('', @fullseq);    

    ######################################
    # Build the parameter list
    ######################################
    my $param = 'SEQ_NAME='.$seqname.
               '&SEQUENCE='.$fullseq.
               '&CONSTRAINTS='.$constraints.
               '&LorC='.$LorC.
               '&PERCENT='.$percent.
               '&MAXFOLDS='.$maxfolds.
               '&WINDOW='.$window.
               '&MAX_LP='.$max_lp.
               '&MAX_AS='.$max_as.
               '&MAXBP='.$maxbp.
               '&BATCH='.$batch.
               '&EMAIL_ADDR='.$email_addr.
               '&GIFRES='.$gifres.
               '&MODE='.$mode.
               '&GRID='.$grid.
               '&LAB_FR='.$lab_fr.
               '&OFFSET='.$offset.
               '&ROT_ANG='.$rot_ang.
               '&ANN='.$ann.
               '&MAX_LENGTH1='.$max_length1.
               '&MAX_LENGTH2='.$max_length2.
               '&NA='.$na.
               '&RH='.$rh
               ;
    print "Opening Socket...\n";
    ######################################
    # Open the socket
    ######################################
    my $sock = new IO::Socket::INET(
               PeerAddr=>'www.bioinfo.rpi.edu',
               PeerPort=>'80',
               Proto=>'tcp'
           );
    die "Could not create socket: $!\n" unless $sock;
    
    ######################################
    # Send information
    ######################################
    print $sock "POST " . $page . " HTTP/1.1\n" .
                "Host: www.bioinfo.rpi.edu\n" .
                "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1\n" .
                "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\n" .
                "Accept-Language: en-us,en;q=0.5\n" .
                "Accept-Encoding: gzip,deflate\n" .
                "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\n" .
                "Keep-Alive: 300\n" .
                "Connection: keep-alive\n" .
                "Referer: http://www.bioinfo.rpi.edu/applications/mfold/old/rna/form1.cgi\n" .
                "Content-Type: application/x-www-form-urlencoded\n" .
                "Content-Length: " .  length($param) . "\n\n" .
                $param . "\n";
    
    print "Receiving Confirmation...\n";
    ########################################
    # Receive Information, store in array
    ########################################
    my $line;
    my @store;
    my $z=0;
    while( $line = <$sock> ){
       $store[$z] = $line;
       $z++;
    }
    
    ########################
    # Close Socket
    ########################
    close($sock);
    my $address = get_url(@store);
    
    return $address;

}

# Search @store for URL to download info
sub get_url{
    my @store = @_;
    ########################
    # Search for link, set up address for download ($address2)
    ########################
    print "Searching for link...\n";
    my $last = join('', @store);
    my @new_url = split(/url=http:\/\/www.bioinfo.rpi.edu\/applications\/mfold\/old\/mfold\//, $last);
    my $new2 = $new_url[1];
    @new_url = ();
    @new_url = split(/\/\"><\//, $new2);
    my $address = '/applications/mfold/old/mfold/' . $new_url[0];
    $new2 = $new_url[0];
    @new_url = ();
    @new_url = split(/\//, $new2);
    $address = $address . '/' . $new_url[1];
    return $address;
}

# Function to open socket, download info to file
sub socket_download{
    my ($mode, $download, $finalpath, $type) = @_;
    
    print "Opening Socket for $type Download...\n";
    ######################################
    # Open the socket
    ######################################
    my $sock = new IO::Socket::INET(
               PeerAddr=>'www.bioinfo.rpi.edu',
               PeerPort=>'80',
               Proto=>'tcp'
           );
    die "Could not create socket: $!\n" unless $sock;
    
    ######################################
    # Send information
    ######################################
    print $sock "GET /" . $download . " HTTP/1.1\n" .
                "Host: www.bioinfo.rpi.edu\n" .
                "User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1\n" .
                "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\n" .
                "Accept-Language: en-us,en;q=0.5\n" .
                "Accept-Encoding: gzip,deflate\n" .
                "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\n" .
                "Keep-Alive: 300\n" .	
                "Connection: keep-alive\n\n";
    
    print "Receiving $type File...\n";
    ########################################
    # Receive Information, print to terminal and store in array
    ########################################
    
    open(DATAOUT, $finalpath) || die;
    my $print = 0;
    if($mode == 1){
        binmode $sock;
        binmode DATAOUT;
        my $buffer;
        while ($buffer = readline($sock)){
            if($buffer =~ /Error/ || $buffer =~ /error/){
                return 1;
            }
            if($print == 2){
                print DATAOUT $buffer;
            }
            if($print == 1){
                $print = 2;
                binmode $sock;
            }
            if($buffer =~ /Content-Type/){
                $print = 1;
            }
        };
    }else{
    while(my $line = <$sock> ){
        if($line =~ /Error/ || $line =~ /error/){
            return 1;
        }
       if($print == 1){
        print DATAOUT $line;
       }
       if($line =~ /Content-Type/){
        $print = 1;
       }
    }}
    close(DATAOUT);
    
    ########################
    # Close Socket
    ########################
    close($sock);
    return 0;
}

# Function to print out version information
sub print_version{
    system("cls");

    print '############################################',"\n";
    print '# MFold Submission/Result Retrieval Script #',"\n";
    print '# Created by Wade Schulz - Univeristy of   #',"\n";
    print '# Minnesota - Department of Medicine       #',"\n";
    print '# Hosted at www.bioinformatics.org         #',"\n";
    print '# MFold Created by Dr. Michael Zuker       #',"\n";
    print '# www.bioinfo.rpi.edu                      #',"\n";
    print '############################################',"\n\n";
    
}