#!/usr/bin/perl 
###################################################################################################
# YAPP Eukaryotic Core Promoter Predictor
###################################################################################################
use strict;
use CGI;

use Conf;
my $cfg=Conf->new();

my $cgi = CGI->new;

my $ruler="----~----1----~----2----~----3----~----4----~----5----~----6----~----7----~----8----~----9----~----0";
my $seq;  # DNA sequence
my @nseq; # numericised target seq

&showform;

# Check for user response
#my %form; foreach my $p ($cgi->param()) { $form{$p} = $cgi->param($p); print "$p = $form{$p}<br>\n"; } # DEBUG

if ($cgi->param()) {
	if ($cgi->param('scan')) {
		&scan_seq;
    }
}

print $cgi->end_html . "\n";
exit(0);

###################################################################################################
sub showform {
###################################################################################################

	print $cgi->header . "\n";

	print $cgi->start_html( 
		-title => " YAPP Eukaryotic Core Promoter Predictor ",
		-style =>{-src=> "$cfg->{assetdir}/gaba.css"}
	);

	print "<p><em><span style=\"font-family:'Comic Sans MS'; font-size:36.0pt; color:#339966; \">YAPP</span></em>";
	print "<span style=\"font-family:'Comic Sans MS'; font-size:10.0pt; color:#339966; \">Eukaryotic Core Promoter Predictor</span></p>"; 

	print $cgi->h3("Scan Sequence for Core Promoter Elements");
	print "<p> A tool to scan for core promoter elements - BREs, TATA boxes, INRs and DPEs, and synergistics combinations of";
	print " these elements <a href='yapp_intro.cgi'>(more)</a>. The search results may be restricted to elements which lie within the functional range of a specified TSS.</p>"; 
	
	my $sample="TGAGGTCTGGGTCTCTGTGACCTCACAATGACCAGGACCCTGCCCGGGTCTATATAAGAGGCCGGGAAGTCGGCCCCTGTCACAGCCCACAAATTCCACCTGCTCACAGGTTGGCTGGCTCAACCAAGGCGGTATCCCCTGCTCTGAGCATCCAGGCCGAATCCACCCAGCACCATGGCC";
	print $cgi->start_form;
	print "Sequence <br>";
	print $cgi->textarea(-name => 'sequence', -rows=>5, -cols=>60, value=>"$sample", id=>'sequence') . "<br>";
	print "Position of Transcription Start Site (optional): ";
	print $cgi->textfield(-name => 'tss', -size=>5); 
	print " Cutoff score (default 0.80): ";
	print $cgi->textfield(-name => 'cutoff', -size=>5) . "<br>"; 
	print $cgi->submit(-name => 'scan',	-value  => 'Submit');
	print $cgi->button(-name => 'clear', -value => 'Clear', -onclick=>"clearText(document.getElementById('sequence'))" );

	print $cgi->end_form;

	print '<script type="text/javascript">';
	print 'document.forms[0].sequence.focus();';
	print "function clearText(t) { t.value = ''; }";
	print '</script>';

}

###################################################################################################
sub scan_seq {
###################################################################################################

	use PWMLib;
	my $pwmlib = PWMLib->new(); # motif PWM library and methods

	my %ScoreSet;	# scores above cutoff for each pwm in a HoH {matrix}{seqno} 

	my ($pwml,$cutoff,$offset); # pwm len and cutoff score

	my ($i,$j,$s,$m,$keys,$k,$tss);

	$seq=uc($cgi->param('sequence'));
	$seq =~ s/\s//g;    #remove whitespaces

	if (defined $cgi->param('cutoff')) {
		$cutoff=$cgi->param('cutoff');	# optional cutoff score
	}
	else {
		$cutoff=0; # use default
	}
	$tss=$cgi->param('tss') if ($cgi->param('tss') > "");	# optional TSS 

	&conv_seq;
	print "<pre>";
	print $ruler . "<br>";
	print &chop_seq($seq,100);
	print "<hr>";
	print "Sequence Length = " . length($seq) . "\n";

	# scan the sequence 
	my $ScoreSetRef;	# ref to scores for each pwm in a HoH {matrix}{seqno}
	$ScoreSetRef = $pwmlib->scanall(\@nseq,$cutoff,$tss); 	# tss may be undefined, thats ok

	$keys = $pwmlib->getkeys();
	foreach $m (@{$keys}) {
		$pwml = $pwmlib->pwmlen($m);
		$cutoff = $pwmlib->pwmcutoff($m) if $cutoff==0;
		print "Matches for $m motif (len $pwml), cutoff score $cutoff.\n";
		#foreach $k (keys %{$ScoreSetRef->{$m}}) {   # access in score order	# unordered
		foreach $k (sort { $ScoreSetRef->{$m}{$b} <=> $ScoreSetRef->{$m}{$a} } (keys (%{$ScoreSetRef->{$m}}))) {   # access in score order
			if (defined $tss) {
				$offset = $k+1-$tss; # displayed offset
				$offset++ if ($offset >= 0);	# +0 displayed as +1, etc
				printf("Pos %d (%+d) = %.2f -> %s\n",($k+1), $offset, $ScoreSetRef->{$m}{$k},substr($seq,$k,$pwml));
			}
			else {
				printf("Pos %d = %.2f -> %s\n",($k+1), $ScoreSetRef->{$m}{$k},substr($seq,$k,$pwml));
			}
		}
	}

	# Search for synergistic combinations

	# arrays to hold graph data
	my (@xvals,@scores);
	for ($i=0;$i<length($seq);$i++) {
		$xvals[$i]=$i;
		$scores[$i]=0;
	}

	my $syncom = $pwmlib->searchcomb(\@nseq,$cutoff,$tss);  # tss may be undefined, thats ok

	print "Found " . ($#{$syncom} + 1) . " synergistic core element combinations.\n";
	my $i;
	for ($i=0;$i<=$#{$syncom};$i++) {
		my $e1 = $$syncom[$i]{e1};
		my $p1 = $$syncom[$i]{p1};
		my $len1 = $pwmlib->pwmlen($e1);
		my $e2 = $$syncom[$i]{e2};
		my $p2 = $$syncom[$i]{p2};
		my $len2 = $pwmlib->pwmlen($e2);
		my $tss = $$syncom[$i]{tss};
		print " $e1 at " . ($p1+1) . " -> " . substr($seq,$p1,$len1);
		print " $e2 at " . ($p2+1) . " -> " . substr($seq,$p2,$len2) . " [TSS=" . $tss . "]";
		printf(": Combined Score: %.2f\n",$$syncom[$i]{score});
		$scores[$tss] += $$syncom[$i]{score};
	}

	print "</pre>";
	print $cgi->hr;

## the graph
use CGI qw(:standard);
use GD::Graph::lines;

my ($x,$y);
$x=600;
$y=300;

my $graph = GD::Graph::lines->new($x, $y);

my @data=([@xvals],[@scores]);

#Xskip is exponent of seq len
my $seqlen=length("$seq");
my $exp=length("$seqlen")-1;
my $xls=10**$exp;

 $graph->set(
      x_label           => 'Seqno',
      y_label           => 'Score',
      title             => 'YAPP TSS Prediction',
      x_label_skip      => $xls,
  ) or die $graph->error;

my $gd = $graph->plot(\@data) or die "<p> falied </p>" . $graph->error;

  open(IMG, '>../file.png') or die $!;
  binmode IMG;
  print IMG $gd->png;
  close (IMG);

  print "<p><image src='/yapp/file.png' alt='file.png'/></p>";
  print $cgi->hr;

# primitive hit counter
my $count = `ls ./count/`;
chomp $count;
rename ("./count/$count", "./count/" . ++$count) or print "<p>cannot rename\n</p>";
print "<p align='right'>$count</p>";

}

sub conv_seq {
###################################################################################################
# convert nts to numerics
###################################################################################################

	#convert seq to array of numerics
	my %NT=('A'=>0,'C'=>1,'G'=>2,'T'=>3);
	my $i;
	for ($i=0;$i<length($seq);$i++) {	# loop thro sequence
		if (defined $NT{substr($seq,$i,1)}) {
			$nseq[$i]=$NT{substr($seq,$i,1)};
		}
		else {
			$nseq[$i]=9;	# not [ACGT]
		}
	}
}

sub chop_seq {
###################################################################################################
# Sub to break sequence up with <br>
# params - sequence, chunksize
# Returns - sequence string with BR tags
###################################################################################################

    my ($seq,$len) = @_;
    my ($ret,$i,$c);

    $i = 0;
    $c = 0;

    while ($i < length($seq)) {
        if ($c == $len) {   # line break
            $ret .= "\n";
            $c=0;
        }
		$ret .= substr($seq,$i,1);
		$i++;
		$c++;
    }
    return $ret;
}

# end
