#!/usr/bin/python
############################################################################
## DeltaStat v0.3
##
## Copyright 2004 Jason Dunsmore
##
##    This file is part of DeltaStat.
##
##    DeltaStat is free software; you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation; either version 2.1 of the License, or
##    (at your option) any later version.
##
##    DeltaStat is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with DeltaStat; if not, write to the Free Software
##    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##
## This program will query the database to get columns from spots that
## satisfy certain criteria and perform a 2 sample t-test on the control
## and treated groups.
##

#
# DeltaStat backend
#

from rpy import *
import MySQLdb
import sys
import re
import string

class Ttest:
    def __init__(self, query, n):

        self.exp_date = query['exp_date']
        self.pvalue_number = query['pvalue_number']
        
        general_conditions = 'WHERE ' + query['freq_gel'] + ' ' + query['freq_operator'] + ' ' + query['freq_number'] + '\nAND ' + query['rstd_grp'] + ' ' + query['rstd_operator'] + ' ' + query['rstd_number']

        # Conditions of database query for analyses. 
        self.conditions = {1: general_conditions + '\nAND `ratio mean norm Volume experimental / mean norm Volume control` ' + query['foldchange_operator1'] + ' ' + query['foldchange_number1'] + '\nAND ' + query['normvol_pos_grp'] + ' ' + query['normvol_pos_operator'] + ' ' + query['normvol_pos_number'],
                           2: general_conditions + '\nAND `ratio mean norm Volume experimental / mean norm Volume control` ' + query['foldchange_operator2'] + ' ' + query['foldchange_number2'] + '\nAND ' + query['normvol_neg_grp'] + ' ' + query['normvol_neg_operator'] + ' ' + query['normvol_neg_number']}

        # Join *_allGels and *_statistics tables
        self.mysql_query = '''
        SELECT %s, %s FROM `%s_allGels`
        LEFT JOIN `%s_statistics`
        USING ( NUMBER ) %s
        GROUP BY `ctrl4 ID`
        '''

        # Fields to query from the database.
        self.fields = '''
        `ctrl1 ID`, `average norm Volume of group control`,
        `relative standard deviation of group control`,
        `average norm Volume of group experimental`,
        `relative standard deviation of group experimental`,
        `ratio mean norm Volume experimental / mean norm Volume control`,
        MAX(`number of spots in row of group control`)
        '''

        self.ctrl_replicates = {
            1: '`norm volume ctrl1`',
            2: '`norm volume ctrl1`, `norm volume ctrl2`',
            3: '''`norm volume ctrl1`, `norm volume ctrl2`,
            `norm volume ctrl3`''',
            4: '''`norm volume ctrl1`, `norm volume ctrl2`,
            `norm volume ctrl3`, `norm volume ctrl4`'''}

        self.exp_replicates = {
            1: '`norm volume exp1`',
            2: '`norm volume exp1`, `norm volume exp2`',
            3: '`norm volume exp1`, `norm volume exp2`, `norm volume exp3`',
            4: '''`norm volume exp1`, `norm volume exp2`, `norm volume exp3`,
            `norm volume exp4`'''}
        
        # Connect to database and initialize dababase cursor
        db=MySQLdb.connect(host="localhost", user="user",
                           passwd="pass", db="2d-gels")
        cursor=db.cursor()

        # Query normalized volume from database with support for variable
        # numbers of replicates
        try: cursor.execute(self.mysql_query % (
            self.ctrl_replicates[4], self.fields, self.exp_date, self.exp_date,
            self.conditions[n])); len_ctrl=4
        except MySQLdb.Error, e:
            try: cursor.execute(self.mysql_query % (
                self.ctrl_replicates[3], self.fields, self.exp_date,
                self.exp_date, self.conditions[n])); len_ctrl=3
            except MySQLdb.Error, e:
                try: cursor.execute(self.mysql_query % (
                    self.ctrl_replicates[2], self.fields, self.exp_date,
                    self.exp_date, self.conditions[n])); len_ctrl=2
                except MySQLdb.Error, e:
                    sys.exit(
                        "More than one ctrl trial is needed for a t-test.")
        data1=cursor.fetchall()

        try: cursor.execute(self.mysql_query % (
            self.exp_replicates[4], self.fields, self.exp_date, self.exp_date,
            self.conditions[n])); len_exp=4
        except MySQLdb.Error, e:
            try: cursor.execute(self.mysql_query % (
                self.exp_replicates[3], self.fields, self.exp_date,
                self.exp_date, self.conditions[n])); len_exp=3
            except MySQLdb.Error, e:
                try: cursor.execute(self.mysql_query % (
                    self.exp_replicates[2], self.fields, self.exp_date,
                    self.exp_date, self.conditions[n])); len_exp=2
                except MySQLdb.Error, e:
                    sys.exit(
                        "More than one exp trial is needed for a t-test.")
        data2=cursor.fetchall()
        data=zip(data1,data2)

        # Perform t-test using R via rpy.  Put the results in list pvalues.
        self.raw_stats = []
        for tupleslice in data:
            fullstats = r.t_test(
                tupleslice[0][0:len_ctrl], tupleslice[1][0:len_exp])
            self.raw_stats.extend(
                [[fullstats['p.value'], tupleslice[1][len_exp],
                  tupleslice[1][len_exp+1], tupleslice[1][len_exp+2],
                  tupleslice[1][len_exp+3], tupleslice[1][len_exp+4],
                  tupleslice[1][len_exp+5]]])
        self.raw_stats.sort()

        # Filter out spots that have no matchings or high p-values.
        self.count = 1
        self.cropped_stats = []
        for row in self.raw_stats:
            if float(row[0]) < float(self.pvalue_number) and row[2] != 0:
                self.cropped_stats.append(row[0:8])
                self.count += 1
                
    # Get number of results
    def n_max(self):
        return self.count - 1

class HeaderParse:
    def all_data(self, exp_date):
        sql_header = 'use 2d-gels \nCREATE TABLE `' + exp_date + '_allGels` (\n'
        sql_footer = '''`number` INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
        );

        LOAD DATA INFILE '/path/to/quantitationTable_allGels.txt'
        INTO TABLE `''' + exp_date + "_allGels` FIELDS TERMINATED BY ';';"

        sql = sql_header
        data = open('all-columns.txt').readline()
        data = data.replace('";"','`,\n`')
        data = data.replace('"\n','`,\n')
        data = data.replace('"','`')
        data = data.replace('%','norm')
        fields = data.split('\n')
        for field in (fields):
            if((field.find('norm ') > 0) or (field.find('ratio ') > 0)
               or (field.find('volume ') > 0) or (field.find('area ') > 0)
               or (field.find('grey ') > 0) or (field.find('background ') > 0)
               or (field.find('coordinate ') > 0)):
                field = field.replace("`,", "` FLOAT UNSIGNED NOT NULL,\n")
            elif ((field.find(' ID`') > 0) or (field.find('frequency ') > 0)):
                field = field.replace("`,", "` INT UNSIGNED DEFAULT '0' NOT NULL,\n")
            elif ((field.find('label ') > 0)):
                field = field.replace("`,", "` VARCHAR(50) NOT NULL,\n")
            else:
                field = field.replace("`,", "` VARCHAR(10) NOT NULL,\n")
            sql += field
        
        sql += sql_footer
        return sql

    def stats(self, exp_date):
        sql_header = 'use 2d-gels \nCREATE TABLE `' + exp_date + '_statistics` (\n'
        sql_footer = '''`number` INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
        );
        
        LOAD DATA INFILE '/path/to/quantitationTable_statistics.txt'
        INTO TABLE `''' + exp_date + "_statistics` FIELDS TERMINATED BY ';';"

        sql = sql_header
        data = open('statistics.txt').readline()
        data = data.replace('";"','`,\n`')
        data = data.replace('"\n','`,\n')
        data = data.replace('"','`')
        data = data.replace('%','norm')
        fields = data.split('\n')
        for field in (fields):
            if((field.find('norm ') > 0) or (field.find('ratio ') > 0)
               or (field.find('volume ') > 0) or (field.find('area ') > 0)
               or (field.find('average ') > 0) or (field.find('background ') > 0)
               or (field.find('spots in row ') > 0)):
                field = field.replace("`,", "` FLOAT UNSIGNED NOT NULL,\n")
            elif ((field.find(' ID`') > 0) or (field.find('frequency ') > 0)):
                field = field.replace("`,", "` INT UNSIGNED DEFAULT '0' NOT NULL,\n")
            else:
                field = field.replace("`,", "` VARCHAR(10) NOT NULL,\n")
            sql += field
        
        sql += sql_footer
        return sql
