#!/usr/bin/perl -w

# Run Disperse
#
# Michael Zhang, Johan Stenberg
# Copyright Stanford University, 2007
#
# Purpose: Run the selector design pipeline from the candidate gene list to the selector probe sequences
# Usage: perl run_selector_pipeline.pl 
# 	-i [gene names input file] 
# 	-s [design settings file]
#	-c [config file] (optional, will default to ../config/default.cfg)
#	-start [stage #]
#	-stop [stage #]
#   -nosnp
# Input: 
# 	1. Gene names input file
# 		a. File containing a list of gene names to perform the selector design, one on each line
# 		b. Gene names must match that used in CCDS
# 	2. Design settings file
# 		a. Pipeline settings variables (key=value pairs, one on each line)
# 			i. 	pipeline.roiFlank - # bases flanking each side of cds coordinates to form ROI region
# 			ii. pipeline.targetFlank - # bases flanking each ROI to form target region
# 	3. Configuration file
# Output: 10 files, each prefixed with the gene names input filename
# 	1. Region of interest file (.roi)
# 	2. Target sequence file (.target)
# 	3. Reference sequence file (.refseq)
# 	4. SNP information file (.snp)
# 	5. SNP target sequence file (.snp_target)
# 	6. PieceMaker report file (.PiM_report)
# 	7. Fragment file, PieceMaker output (.fragment)
#	8. Selection file, selection of fragments output by selectfrags (.selection)
# 	9. Amplicon file (.amplicon)
# 	10. Probe file, ProbeMaker output (.probe)



use strict;

use FileHandle;
use File::Spec;
use Getopt::Long;

# add custom library directory to the path
use FindBin;

use lib $FindBin::Bin."/../lib";


use Bio::Disperse::Utils;
use Log::Log4perl qw/:easy/;


my $SCRIPTS_PATH = $FindBin::Bin;
my $DEFAULT_CONFIG_FILE = $FindBin::Bin . '/../config/default_config.properties';
my $DEFAULT_LOG4PERL_FILE = $FindBin::Bin . '/../config/log4perl.properties';

my $PIPELINE_STAGES = {
	1 => {	'name' => 'Exon coordinate generator'},
	2 => {	'name' => 'ROI coordinate generator'},
	3 => {	'name' => 'Target sequence generator'},
	4 => {	'name' => 'SNP info generator'},
	5 => {	'name' => 'SNP addition to target'},
	6 => {	'name' => 'PieceMaker'},
	7 => {	'name' => 'Fragment selection'},	
	8 => {	'name' => 'Amplicon generator'},
	9 => {	'name' => 'ProbeMaker'},
	10 => {	'name' => 'Output file consolidator'}	
	
};


my ($gene_names_infile, $settings_infile, $config_infile, $log4perl_infile, $start_stage, $stop_stage);
my $nosnp;
GetOptions(
	"i=s" => \$gene_names_infile,
	"s=s" => \$settings_infile,
	"c=s" => \$config_infile,
	"l=s" => \$log4perl_infile,	
	"start=i" => \$start_stage,
	"stop=i" => \$stop_stage,
	"nosnp" => \$nosnp	
) || &usage();

# if the required input files were not specified, output usage
if (!defined($gene_names_infile) || !defined($settings_infile))
{
	&usage();
}

# the default config file is used if none is specified
if (!defined($config_infile)) {
	$config_infile = $DEFAULT_CONFIG_FILE;
}
$config_infile = File::Spec->rel2abs($config_infile);

# the default log4perl file is used if none is specified
if (!defined($log4perl_infile)) {
	$log4perl_infile = $DEFAULT_LOG4PERL_FILE;
}
$config_infile = File::Spec->rel2abs($config_infile);

# # Create settings input filehandle
# my $settings_fh = new FileHandle($settings_infile);
# if (!defined($settings_fh))
# {
# 	$logger->error( "Could not open file: $settings_infile!");
# 	exit(1);
# }

# my $settings = {};

# # create hash for settings using input file
# while (<$settings_fh>) {
# 	chomp;
# 	if (/(.*)=(.*)/) {
# 		$settings->{$1} = $2;
# 	}
# }

# $settings_fh->close;

# initialize log for perl level to error
Log::Log4perl->init($log4perl_infile);
my $logger = get_logger();

my $settings = Bio::Disperse::Utils::parse_config($settings_infile, $logger);
my $config = Bio::Disperse::Utils::parse_config($config_infile, $logger);

my $PIECEMAKER_PATH = $config->{'path.javaExecs'};
my $FASTACMD_BIN = $config->{'path.fastacmd'};
my $MASTER_SNP_FILE = $config->{'resource.masterSnpFile'};
my $CCDS_FILE = $config->{'resource.ccdsFile'};
my $FASTACMD_BLASTDB = $config->{'resource.blastDb'};


$PIECEMAKER_PATH = Bio::Disperse::Utils::get_abs_config_entry($PIECEMAKER_PATH,$config_infile);
$MASTER_SNP_FILE = Bio::Disperse::Utils::get_abs_config_entry($MASTER_SNP_FILE,$config_infile);
$CCDS_FILE = Bio::Disperse::Utils::get_abs_config_entry($CCDS_FILE,$config_infile);
$FASTACMD_BLASTDB = Bio::Disperse::Utils::get_abs_config_entry($FASTACMD_BLASTDB,$config_infile);
$FASTACMD_BIN = Bio::Disperse::Utils::get_abs_config_entry($FASTACMD_BIN,$config_infile);

my $cds_coord_file = $gene_names_infile . '.cds';
my $roi_coord_file = $gene_names_infile . '.roi';
my $target_file = $gene_names_infile . '.target';
my $refseq_file = $gene_names_infile . '.refseq';
my $snp_info_file = $gene_names_infile . '.snp';
my $snp_target_file = $nosnp ? $target_file : $gene_names_infile . '.snp_target';
my $PiM_report_file = $gene_names_infile . '.PiM_report';
my $fragment_file = $gene_names_infile . '.fragment';
my $selection_file =  $gene_names_infile . '.selection';
my $amplicon_file = $gene_names_infile . '.amplicon';
my $probe_file = $gene_names_infile . '.probe';



$PIPELINE_STAGES = {
	1 => {	'name' => 'Exon coordinate generator',
			'cmd' => "perl $SCRIPTS_PATH/exon_coord_generator.pl",
			'params' => "-i $gene_names_infile -o $cds_coord_file -ccds $CCDS_FILE"
	},
	2 => {	'name' => 'ROI coordinate generator',
			'cmd' => "perl $SCRIPTS_PATH/roi_coord_generator.pl",
			'params' => "-i $cds_coord_file -o $roi_coord_file -flank $settings->{'disperse.roiFlank'}"
	},
	3 => {	'name' => 'Target sequence generator',
			'cmd' => "perl $SCRIPTS_PATH/target_seq_generator.pl",
			'params' => "-i $roi_coord_file -target $target_file -refseq $refseq_file -flank $settings->{'disperse.targetFlank'} -fastacmd $FASTACMD_BIN -blastdb $FASTACMD_BLASTDB"
	},
	4 => {	'name' => 'SNP info generator',
			'cmd' => "$PIECEMAKER_PATH/filtervars.sh",
			'params' => "-t $target_file -m $MASTER_SNP_FILE -v $snp_info_file"
	},
	5 => {	'name' => 'SNP addition to target',
			'cmd' => "$PIECEMAKER_PATH/addsubs.sh",
			'params' => "-t $target_file -v $snp_info_file -o $snp_target_file"
	},
	6 => {	'name' => 'PieceMaker',
			'cmd' => "$PIECEMAKER_PATH/piecemaker.sh",
			'params' => "-t $snp_target_file -s $settings_infile -o $fragment_file -r $PiM_report_file"
	},
	7 => {	'name' => 'Fragment selection',
			'cmd' => "$PIECEMAKER_PATH/selectfragments.sh",
			'params' => "-t $snp_target_file -f $fragment_file -s $settings_infile -o $selection_file"
	},
	8 => {	'name' => 'Amplicon generator',
			'cmd' => "perl $SCRIPTS_PATH/amplicon_generator.pl",
			'params' => "-i $selection_file -o $amplicon_file"
	},
	9 => {	'name' => 'ProbeMaker',
			'cmd' => "$PIECEMAKER_PATH/probemaker.sh",
			'params' => "-s $settings_infile $selection_file $probe_file"
	},
	10 => {	'name' => 'Output file consolidator',
			'cmd' => "perl $SCRIPTS_PATH/output_file_consolidator.pl",
			'params' => "-cds $cds_coord_file -roi $roi_coord_file -target $target_file " .
						"-frag $fragment_file -amp $amplicon_file -probe $probe_file " .
						($nosnp ? "" : " -snp $snp_info_file -snptarget $snp_target_file")
	}
};

print "\nStarting selector design pipeline on $gene_names_infile...\n";
print "Using config file $config_infile...\n";
print "Using settings file $settings_infile...\n";

# Get the list of stages to be run
if (!defined($start_stage))
{
	$start_stage = 1;
}
if (!defined($stop_stage))
{
	$stop_stage = scalar(keys(%$PIPELINE_STAGES));
}

# Print info on stages being run
if ($start_stage == $stop_stage)
{
	print "Running stage $start_stage...\n";
}
else
{
	print "Running stages $start_stage through $stop_stage...\n";
}

my @stages = $start_stage..$stop_stage;

print "Starting at ".localtime()."\n";

# Run all the designated stages
for my $stage (@stages)
{
	my $stage_info = $PIPELINE_STAGES->{$stage};
	my $stage_name = $stage_info->{'name'};
	my $stage_cmd = $stage_info->{'cmd'};
	my $stage_params = $stage_info->{'params'};
	
	print "----------------------------------------------------------\n";
	
	if ($nosnp && ($stage == 4 || $stage == 5)) {
		print "Skipping $stage_name...\n";
	}
	else {
		print "Running $stage_name...\n";
		
		if (system("$stage_cmd $stage_params"))
		{
			print "Pipeline failed in $stage_name.\n";
			exit(1);
		}
	}
}
print "Ending at ".localtime()."\n";
print "\nPipeline run complete.\n\n";

sub usage
{
	print "Bad input.\n";
	print "\tUsage:\n";
	print "\t\trun_selector_pipeline.pl <options>\n";
	print "\n\tOptions:\n";
	print "\t\t-i [gene names input file] Required\n";
	print "\t\t-s [design settings file] Required\n";
	print "\t\t-c [configuration file] Optional (default: config/default.config)\n";
	print "\t\t-nosnp Optional (default: false)\n";	
	print "\t\t-start [start stage number] Optional (default: first stage)\n";
	print "\t\t-stop [stop stage number] Optional (default: last stage)\n";
	print "\n\tStages:\n";
	for my $stage (sort(keys(%$PIPELINE_STAGES)))
	{
		print "\t\t$stage\t" . $PIPELINE_STAGES->{$stage}->{'name'} . "\n";
	}
	exit(1);
}
