<?php

class UniGene {
	
	// UniGene DLST: Dynamic Local Storage Tool
	// R. Hart (c) 2005-6 Rutgers, The State University
	// rhart@rci.rutgers.edu
	
	// unigene.class.inc 
	
	// DLST is free software; you can redistribute it and/or
	// modify it under the terms of the GNU General Public License
	// as published by the Free Software Foundation; either version 2
	// of the License, or (at your option) any later version.
	//
	// DLST is distributed in the hope that it will be useful,
	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	// GNU General Public License for more details.
	//
	// http://www.gnu.org/copyleft/gpl.html
	//
	// You should have received a copy of the GNU General Public License
	// along with this program; if not, write to the Free Software
	// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
	
	// Database info:
	// table unigene - create by running "unigene.sql" in main folder
	// holds records of genomes available at ncbi and which are stored locally
	
	// table XXXdata: (XXX=genome code)
	// holds records of clusters, one cluster per row
	
	// table XXXsequence: (XXX=genome code)
	// holds records of genbank accession numbers clustered into a specific unigene cluster
	// many rows per cluster
	
	// table XXXexpress: (XXX=genome code)
	// holds tissue expression data for each cluster
	// many rows per cluster

	// Class Members
	
	// FTP NCBI data -- Set $ftp_pw to your email address
	var $ftp_server="ftp.ncbi.nih.gov";
	var $ftp_directory="repository/UniGene";
	var $ftp_uname="anonymous";
	var $ftp_pw="a@b.com";  //insert your email address
	var $ftp_info="*.info";
	var $genome_code;
	var $genome_dir=array(); //to hold names of genome directories
	var $genome_dir_assoc=array(); //associative by genomecode

	// Database info -- Change these to match your MySQL database server
	var $db_server = "localhost";  // "localhost" is default
	var $db_uname = "root"; // "root" is default but  you may wish to create a dedicated username for this purpose
	var $db_pw = ""; // defaults to a blank for no password.  
	var $db_name = "chips"; // defaults to "chips" but you can use any database name you like.
	var $db; // leave unassigned
	
	// Temporary local (web server) folder to hold files.  MUST have write access for the user running Apache.
	var $home_dir = "/Library/WebServer/Documents/unigene/";
	//var $home_dir = "C:/";  //TEMP FOR DEBUGGING XXX
	var $local_file_dir = "files/";
	
	// Admin - variable to switch on administrative access
	// this will be replaced with a proper authentication system in future versions
	// but for now you can turn on or off admin access here
	var $admin = true;
	
	// Miscellaneous re-used variables
	var $graphic = "img/unigene_banner_dlst.gif";  // image used in header
	var $search_page = "index.php?go=Search"; // link to a local search page using database
	var $crlf = "\r\n";
	var $nightly_limit = 1; // set to number of genome downloads desired per nightly update
	var $error_limit = 2; // set to zero to eliminate diagnostic messages, otherwise errors less than this value will output
	var $error_format = false; //false for html, re-set to true for text error messages
	var $error_length = 255; // string length limit for error messages

	
	// Constructor
	function UniGene(){
		// assemble full path info for file dir
		$this->local_file_dir=$this->home_dir.$this->local_file_dir;
		
		// establish connection to db
		$this->db = mysql_connect($this->db_server, $this->db_uname, $this->db_pw);
		if(mysql_select_db($this->db_name,$this->db)){
			$this->diagnostic("Connected to database",1);
			//return true;
		} else {
			$this->diagnostic("Unable to connect to database".mysql_error(),1);
			return false;
		}
		
		//populate the genome dir array
		$this->get_genome_dirs();
		
		return(true);
	}
	
	// Debug output
	function diagnostic($txt, $level=0){
		
		if($this->error_format){ //text
			$begin="";
			$end=$this->crlf;
		} else { //html
			$begin="<!--";
			$end="-->".$this->crlf;
		}
		
		// only outputs line if passed error level is less than global limit
		if($level < $this->error_limit){
			echo $begin.substr($txt,0,$this->error_length).$end;
		}
	}
	
	// Functions
	function page_head($the_heading="", $the_page="", $admin=false){
		// output page header including graphic
		
		print "<html>".$this->crlf;
		print "<head>".$this->crlf;
		print "<title>$the_heading</title>".$this->crlf;
		print "<meta name='pragma' content='no-cache'>".$this->crlf;
		print "<link rel='stylesheet' href='include/style.css' type='text/css'>".$this->crlf;
				if($the_page=="search"){
		?>
		<SCRIPT LANGUAGE="JavaScript">

		function ratChoice(obj) {

			box1 = obj.form.rn5k.checked;
			box2 = obj.form.rn8k.checked;

			count = (box1 ? 1 : 0) + (box2 ? 1 : 0)

			if (count > 1) {
				alert("Sorry... you may only choose one rat chip at a time.");
				obj.checked = false;
			}
		}
		//  End -->
</script>
<?
		}

		print "</head>".$this->crlf;
		print "<body>".$this->crlf;
		print "<table border='0'><tr>";
		print "<td><a href='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=unigene'>";
		print "<img border='0' src='$this->graphic'></a></td></tr>".$this->crlf;
		print "<tr>";
		print "<td class='center'>";
		print ($the_page != "search" ? "| <a href='index.php?go=Search'>Search Page</a> ": "");
		print ($the_page != "status" ? "| <a href='index.php?go=status'>Status</a> " : "");
		//print ($admin ? "| <a href='index.php?go=refresh'>Refresh Table From NCBI</a> " : "");
		print "|</td></tr></table>".$this->crlf;
		return(true);
	}
	
	
	function page_tail(){
		// output page tail including copyright notice
		print "<p class='small'>Copyright &copy; 2005-6, R. Hart, Rutgers, The State University</p>";
		print "</body>".$this->crlf;
		print "</html>".$this->crlf;
		return(true);
	}
	
	function genome_table($admin){
		// Obtains current genome table from database and outputs it to web page
		if($admin){
			$this->diagnostic("Admin permissions ON",1);  //debug html comment
		}

		$this->db_connect();  //open connection to NCBI ftp site
		
		echo "<form name='getug' method='get' action='index.php'>".$this->crlf;
		echo "<input type='hidden' name='go' value='update'>".$this->crlf;
		
		echo "<h2>Current status of UniGene tables</h2>".$this->crlf;
		echo "<table border='1'>".$this->crlf;
		echo "<tr>"
			.($admin?"<!--<th>Update<br>Now</th>--><th>Queue<br>for<br>Update</th>":"")
			."<th>Genome<br>Code</th><th>Genome</th><th>Build<br>Available</th>"
			."<th>Date<br>Last<br>Build</th>"
			."<th>Build<br>Stored</th><th>Date<br>Stored</th>"
			."</tr>"
			.$this->crlf;
		
		$sql = "select * from unigene order by genomecode asc";
		$result = mysql_query($sql);
		while($row=mysql_fetch_assoc($result)){
			
			echo "<tr>";
			if($admin) {
				//echo "<td class='center'><input type='radio' name='genome' value='".$row['genomecode']."'></td>";
				echo "<td class='center'><input type='checkbox' name='queue[]' value='".$row['genomecode']."'";
				if(($row['queue']==1) /*or (!empty($row['buildlocal'])) and ($row['buildlocal'] != 0) and ($row['buildlocal'] != $row['buildncbi'])*/){
					echo " checked ";
				}
				echo "></td>";
			}
			echo "<td class='center'>".$row['genomecode']."</td>";
			echo "<td><a target='_blank' href='ftp://ftp.ncbi.nih.gov/repository/UniGene/".$this->genome_dir_assoc[$row['genomecode']]."/".ucfirst($row['genomecode']).".info' title='".substr(stripslashes($row['infofilencbi']),0,200)."'>".$row['title']."</a></td>";
			echo "<td class='center'>".$row['buildncbi']."</td>";
			echo "<td class='center'>".($row['infodatencbi'])."</td>";
			echo "<td class='center'>".($row['buildlocal']?$row['buildlocal']:"&nbsp;")."</td>";
			echo "<td class='center'>".(($row['infodatelocal'] != "0000-00-00")?$row['infodatelocal']:"&nbsp;")."</td>";

			echo "</tr>".$this->crlf;
		}
		echo "</table>".$this->crlf;
		if($admin) {
			echo "<input type='submit' name='button' value='Go'>".$this->crlf;
			echo "</form>".$this->crlf;
		}
		return(true);
	}
	

	
	function ncbi_connect(){
		// establish connection to ncbi ftp server
		$conn_id = ftp_connect($this->ftp_server); 

		// login with uname and pw
		$login_result = ftp_login($conn_id, $this->ftp_uname, $this->ftp_pw); 

		// check connection
		if ((!$conn_id) || (!$login_result)) { //error condition
       		echo "<p>FTP connection has failed!<br>";
       		echo "Attempted to connect to $this->ftp_server for user $this->ftp_uname </p>"; 
       		return(false); 
   		} else { //success, hide in html comment
       		$this->diagnostic("Connected to $this->ftp_server, for user $this->ftp_uname ",1);
   		}

		// change the directory 
		if (ftp_chdir($conn_id, $this->ftp_directory)) {  //success, hide in html comment
   			$this->diagnostic("Current directory is now: " . ftp_pwd($conn_id),1);
		} else { //error
   			$this->diagnostic("Couldn't change directory",1);
   			return(false);
		}
		
		//set passive mode = true, this seems to help on some servers
		//TURNED OFF FOR NEW VERSION RPH 10-19-05
		ftp_pasv($conn_id, true);

		return($conn_id);
	}
	
	function ncbi_get_titles($conn_id){
		// get list of all *.info files, one per genome
		// store values in member array named genome_code
		
		$info_tables = ftp_nlist($conn_id, $this->ftp_info);

		for ($i=0;$i<sizeof($info_tables);$i++) {
			$this->genome_code[$i] = substr($info_tables[$i],0,strpos($info_tables[$i],"."));
		}

		return(true);
	}


	
	function ncbi_close($conn_id){
		// close the FTP stream 
		if(ftp_close($conn_id)){
			return(true);
		}
			else {return(false);}
	}
	
	function db_connect(){
		
		// are we currently connect? suppress errors for first inquiry.
		if($try=@mysql_get_host_info()){
			//we're connected, return
			$this->diagnostic("Already connected to database",1);
			return true;
		} else {
		// connect to your mysql server
		$this->db = mysql_connect($this->db_server, $this->db_uname, $this->db_pw);
		if(mysql_select_db($this->db_name,$this->db)){
			$this->diagnostic("Connected to database",1);
			return true;
		} else {
			$this->diagnostic("Unable to connect to database".mysql_error(),1);
			return false;
		}
		}
	}
	
	function get_genome_dirs(){
		// create array of genome directory names using titles from unigene table
		///////////////////// New RPH 10-19-05
		$this->db_connect();
		$sql = "SELECT  genomecode, REPLACE(title,' ','_') AS dir FROM  unigene ORDER BY title ASC";
		if(!($result=mysql_query($sql))) {
			$this->diagnostic("Error running sql: ".$sql."<br>".mysql_error(),1);
			return(false);
		}

		while($buf=mysql_fetch_assoc($result)){
			if(strlen($buf['dir'])){
				array_push($this->genome_dir,$buf['dir']);
				$this->genome_dir_assoc[$buf['genomecode']]=$buf['dir'];
			}
		}
		return true;
	}
	
	function refresh_genome_table($admin, $offline=false){
		// connect to ncbi, download directory list for *.info files
		// use file names to download and parse *.info file contents
		// store results in unigene table
		
		if($offline) print "-----Refresh_genome_table begin ".date("Y-m-d h:m:s").$this->crlf;
		
		if($admin){
			$this->diagnostic("Admin permissions ON",1);
		}
		
		if(!($offline)) echo "<h2>Updating Local UniGene Table</h2>".$this->crlf;
		$conn_id = $this->ncbi_connect();
		$main_directory=ftp_pwd($conn_id);
		$this->db_connect();
		//$this->get_genome_dirs();
		$buff = array();
		
		foreach ($this->genome_dir as $genome) {
			if(ftp_chdir($conn_id,$genome)){
			//if(strlen($genome)){
				$this->diagnostic("Genome is ".$genome." info is ".$this->ftp_info,4);
				//echo "<!--Current directory is ".ftp_pwd($conn_id)."-->".$this->crlf;
				//$info_tables = ftp_nlist($conn_id, $ftp_info);
				$rawlist = ftp_rawlist($conn_id,$this->ftp_info);
				$this->diagnostic("rawlist retrieved is: ".$rawlist[0],4);
				array_push($buff, $rawlist[0]);
				ftp_chdir($conn_id,$main_directory);
			}
			//echo "<!--Current directory is ".ftp_pwd($conn_id)."-->".$this->crlf;
		}
		
		///////////////////This is the part that doesnt work
		// need to loop thru $this->genome_dir and cd to each directory
		
		//get full directory list for info files to array
		//$buff = ftp_rawlist($conn_id,$this->ftp_info);
		
		/*
		ftp_chdir($conn_id, "Mus_musculus");
		echo "<!--Current directory is ".ftp_pwd($conn_id)."-->".$crlf;
		$info_tables = ftp_nlist($conn_id, $ftp_info);
		*/
		
		
		///////////////////////////////////////////////////
		
		
		//create table output
		if(!($offline)){
			echo "<table border='1'>".$this->crlf;
			echo "<tr><th>Code</th><th>Genome</th><th>Build</th><th>Date</th></tr>".$this->crlf;
		}
		
		//loop through each info file directory entry
		for ($i=0;$i<sizeof($buff);$i++) {
			if(empty($buff[$i])){
				$this->diagnostic("info dir line empty or missing for genome number: ".$i,4);
			} else {

			$dir_line = $buff[$i];
			$genome_info_date = "";
			$genome_info_title =  "";
			$genome_build = "";
			$genome_title = "";
			$sql = "";
			
			//parse i-th buff entry
			$dir = $this->read_dir_line($dir_line);
   			$this->genome_code[$i] = substr($dir['name'],0,strpos($dir['name'],"."));
			$genome_info_date = date("Y-m-d",$dir['date']);
			
			//parse info file
			$info = $this->read_info($this->genome_dir[$i]."/".$dir['name'],$conn_id);
			
			//recover data about previously loaded genomes
			$sql = "select queue, buildlocal, infodatelocal from unigene where genomecode='".strtolower($this->genome_code[$i])."' limit 1";
			$return = mysql_query($sql);
			
			if($return) {
				$localdata = mysql_fetch_assoc($return);
			} else {
				$localdata['buildlocal']="";
				$localdata['infodatelocal']="";
				$localdata['queue']="";
			}
			
			//adjust queue to automatically update genomes in gendex
			$sql = "select count(*) as nrows from chiplist left join gendex on chiplist.id=gendex.table_id where chiplist.species_code='"
				.strtolower($this->genome_code[$i])."'";
			$result = mysql_query($sql);
			$nrows = mysql_fetch_assoc($result);
			
			if($nrows['nrows']>0) {
				//this genome is in gendex
				
				if(!($localdata['queue']>0)){
					//not previously selected for update (could be 0 or null)
					
					if($localdata['buildlocal'] != $info['build']){
						//build has been updated, change queue flag for file updating
						$this->diagnostic("build different, local is ".$localdata['buildlocal'].", info is ".$info['build']
							.", so queue set to 1",2);
						$localdata['queue']=1;
					}
				}
			}
			
			//construct sql string containing new ncbi info and previously loaded genomes	
			/*
			$sql = "replace unigene (genomecode, title, buildlocal, buildncbi, infodatelocal, infodatencbi, queue, infofilencbi) "
					."values ('".strtolower($this->genome_code[$i])."', "
						."'".$info['title']."', "
						."'".$localdata['buildlocal']."', "
						."'".$info['build']."', "
						."'".$localdata['infodatelocal']."', "
						."'".$genome_info_date."', "
						."'".$localdata['queue']."', "
						."'".$info['infofile']."')";
			*/		
			//construct sql string containing new ncbi info and update without changing title	
			
			$sql = "update unigene "
						."set buildlocal='".$localdata['buildlocal']."', "
						."buildncbi='".$info['build']."', "
						."infodatelocal='".$localdata['infodatelocal']."', "
						."infodatencbi='".$genome_info_date."', "
						."queue='".$localdata['queue']."', "
						."infofilencbi='".$info['infofile']."' "
						."where genomecode='".strtolower($this->genome_code[$i])."'";
			
			//$this->diagnostic("unigene replace string: ".$sql,4);
			$this->diagnostic("unigene update string: ".$sql,4);  //should be 4
			//execute and check results		
			//if(!(mysql_query($sql))){
			$update_result = mysql_query($sql);
			if(mysql_affected_rows($this->db)==-1){  //only -1 if query failed
				//$this->diagnostic("Failed to replace genome ".$this->genome_code[$i],1);
				$this->diagnostic("Failed to update genome ".$this->genome_code[$i]."\r\naffected_rows returns: "
					.mysql_affected_rows($this->db)."\r\nerror: ".mysql_error($this->db),1);	
				//return(false);
			} else { //success--output table row
				if(!($offline)){
				echo "<tr><td class='center'>".$this->genome_code[$i]."</td><td>"
					.$info['title']."</td><td class='center'>"
					.$info['build']."</td><td class='center'>"
					.$genome_info_date."</td></tr>".$this->crlf;
				} /*else {
					echo $this->genome_code[$i]." ".$info['build'].$this->crlf;
				} */
			}
			}//end of if-else
		}//end of for loop
		
		if($offline) {
			$this->diagnostic("-----End of refresh_genome_table ".date("Y-m-d h:m:s"),1);
		} else {
			echo "</table>".$this->crlf;
		}
		$this->ncbi_close($conn_id);
		return(true);
	}
	
	function read_info($gc,$ftp_id) {
		//using passed file name and ftp connection, get an info file and parse contents
		
		// open a local temp file to hold data
		if(!($tfile = fopen($this->local_file_dir."tmp.txt","x+"))){
			$this->diagnostic("Error creating temp file",1);
		}
			
		// get the info file and grab first line
		if(!(ftp_fget($ftp_id,$tfile,$gc,FTP_ASCII,0))){
			$this->diagnostic("Failed to get info file via ftp: ".$gc,1);
			fclose($tfile);
			unlink($this->local_file_dir."tmp.txt");

			return(false);
		} else {
			rewind($tfile);
			$line = fgets($tfile); //retrieve first line
			ereg("^(UniGene Build \#)([0-9]*)[[:space:]](.*)",$line,$reg);
		
			$tmp['build'] = $reg[2];
			$tmp['title'] = trim($reg[3]);
			$tmp['infofile'] = "";
			//echo "<br>read_info: ".$gc.", ".$tmp['build'].", ".$tmp['title'];	
			rewind($tfile);
			while(!feof($tfile)){
				$tmp['infofile'] .= addslashes(fgets($tfile));
			}
			}
			fclose($tfile);
			unlink($this->local_file_dir."tmp.txt");
			return($tmp);
	}
	
	function read_dir_line($line){
		//parse a directory line from ncbi ftp server

		while (($k = strpos($line, "  ")) !== FALSE)
       		$line = substr($line,0,$k+1).trim(substr($line,$k));

		$parsed_array = split(" ",$line);
		$tmp_array['size']=$parsed_array[4];
		$tmp_array['mon']=$parsed_array[5];
		$tmp_array['day']=$parsed_array[6];
		$tmp_array['timeyear']=$parsed_array[7];
		$tmp_array['name']=$parsed_array[8];
		if(strpos($tmp_array['timeyear'],":")) {
			//timeyear contains time, use current year instead
			if(strtotime($tmp_array['mon']." ".$tmp_array['day'].", ".date("Y"))>time()){
				//date is from previous year since can't be in future
				$tmp_array['date']=strtotime($tmp_array['mon']." ".$tmp_array['day'].", ".(date("Y")-1));
			} else {
				//date is from earlier this year
				$tmp_array['date']=strtotime($tmp_array['mon']." ".$tmp_array['day'].", ".date("Y"));
			}
				
		} else {
			//timeyear contains year
			$tmp_array['date']=strtotime($tmp_array['mon']." ".$tmp_array['day'].", ".$tmp_array['timeyear']);
		}
		return $tmp_array;

	}	
	
	function gettoken($ls, &$li, &$eol) {
		// return tag from line of data file

		while (substr($ls,$li,1) == ' ' or substr($ls,$li,1) == '     ') {
			$li += 1;
		}

		$i1 = $li;

		while (substr($ls,$li,1) <> ' ' and substr($ls,$li,1) <> '    ' and
		substr($ls,$li,1) <> ';' and substr($ls,$li,1) <> '=' and $li <= strlen($ls) - 1) {
			$li += 1;
		}

		if ($li <= strlen($ls) - 1) { $eol = 'N'; } else { $eol = 'Y'; }

		return substr($ls,$i1,$li - $i1);

	}


function gettissue($ls, &$li, &$eol) {
	// parse tissues from data file

	while (substr($ls,$li,1) == ' ' or substr($ls,$li,1) == '     ') {
		$li += 1;
	}

	$i1 = $li;

	while (substr($ls,$li,1) <> ';' and $li <= strlen($ls) - 1) {
		$li += 1;
	}

	if ($li <= strlen($ls) - 1) { $eol = 'N'; } else { $eol = 'Y'; }

	return substr($ls,$i1,$li - $i1);

}

function get_data_file($gc) {
	//download a gzipped data file from ncbi via ftp connection, store to local temp file
	/*
	$sql="select REPLACE(title,' ','_') AS dir from unigene where genomecode='".$gc."' limit 1";
	$this->db_connect();
	if(!($result=mysql_query($sql))){
		echo "<!--Failed to retrieve genome directory name from unigene table. Error: ".mysql_error()."-->".$this->crlf;
		return false;
	}
	$gc_dir = mysql_fetch_assoc($result);
	*/
	$local_file_name = $this->local_file_dir.$gc.".data.gz";
	
	if(file_exists($local_file_name)) {
		$this->diagnostic("File exists: $local_file_name ",1);
		$fp=fopen($local_file_name,"r+");
	} else {
		$this->diagnostic("File being created: $local_file_name ",1);
		$fp=fopen($local_file_name,"x+");
	}
	
	if($fp) {
		$ncbiFn = $this->genome_dir_assoc[$gc]."/".ucfirst($gc.".data.gz");
		$conn_id = $this->ncbi_connect();
		ftp_fget($conn_id, $fp, $ncbiFn, FTP_BINARY, 0);
		fclose($fp);
		$this->ncbi_close($conn_id);
		return(true);
	} else {
		$this->diagnostic("Failed to open file: $local_file_name ",1);
		return(false);
	}
	
}

function parser($gc, $offline=false) {
	// open downloaded, gzipped data file and parse contents into databases
	
	// data file has been downloaded and stored, open file
	// data is gzipped
	$fp = gzopen($this->local_file_dir.$gc.".data.gz","r");

	// establish db connection
	$this->db_connect();

	// clear infodatelocal from unigene
	// delete contents of local genome-specific tables from database
	// create genome-specific tables if they don't exist
	
// data table - empty if exists
$sqlmaker=<<<EOL
DROP TABLE IF EXISTS `%sdata`
EOL;
$sql = sprintf($sqlmaker,$gc);
if(!(mysql_query($sql))) {
	$this->diagnostic("Error running sql: $sql <br>".mysql_error(),1);
	return(false);
}

// data table - create if not exists
$sqlmaker=<<<EOL
CREATE TABLE IF NOT EXISTS `%sdata` (
  `id` int(11) NOT NULL default '0',
  `title` varchar(255) default NULL,
  `gene` varchar(20) default NULL,
  `cytoband` varchar(20) default NULL,
  `mgi` varchar(20) default NULL,
  `locuslink` varchar(20) default NULL,
  `chromosome` varchar(20) default NULL,
  PRIMARY KEY  (`id`),
  KEY `%sDataLocuslink` (`locuslink`)
)
EOL;
$sql = sprintf($sqlmaker,$gc,$gc);
if(!(mysql_query($sql))) {
	$this->diagnostic("Error running sql: $sql <br>".mysql_error(),1);
	return(false);
}

// express table - empty if exists
$sqlmaker=<<<EOL
DROP TABLE IF EXISTS `%sexpress`
EOL;
$sql = sprintf($sqlmaker,$gc);
if(!(mysql_query($sql))) {
	$this->diagnostic("Error running sql: $sql <br>".mysql_error(),1);
	return(false);
}

// express table - create if not exists
$sqlmaker=<<<EOL
CREATE TABLE IF NOT EXISTS `%sexpress` (
  `id` int(11) NOT NULL default '0',
  `seq_no` int(11) NOT NULL default '0',
  `tissue` varchar(255) default NULL,
  PRIMARY KEY  (`id`,`seq_no`)
)
EOL;
$sql = sprintf($sqlmaker,$gc);
if(!(mysql_query($sql))) {
	$this->diagnostic("Error running sql: $sql <br>".mysql_error(),1);
	return(false);
}

// sequence table - empty if exists
$sqlmaker=<<<EOL
DROP TABLE IF EXISTS `%ssequence`
EOL;
$sql = sprintf($sqlmaker,$gc);
if(!(mysql_query($sql))) {
	$this->diagnostic("Error running sql: $sql <br>".mysql_error(),1);
	return(false);
}

// sequence table - create if not exists
$sqlmaker=<<<EOL
CREATE TABLE IF NOT EXISTS `%ssequence` (
  `id` int(11) NOT NULL default '0',
  `seq_no` int(11) NOT NULL default '0',
  `acc` varchar(20) default NULL,
  PRIMARY KEY  (`id`,`seq_no`),
  KEY `seq_acc` (`acc`(10)),
  KEY `acc_index` (`acc`(10))
)
EOL;
$sql = sprintf($sqlmaker,$gc);
if(!(mysql_query($sql))) {
	$this->diagnostic("Error running sql: $sql <br>".mysql_error(),1);
	return(false);
}

	$seq_seq_no=0;
	$ls = gzgets($fp);
	$ls = gzgets($fp);
	$id = 1;
	$iii = 1;
	$li = 0;
	$eol = 'N';
	$title1="";
	$gene="";
	$cytoband="";
	$mgi="";
	$chromo="";

	$token = $this->gettoken($ls, $li, $eol);

	if(!($offline)) echo "<p>Parsing downloaded data (one dot for each cluster stored) ";
	while (!feof ($fp) /* and $iii<=300*/) {
		//echo "<!--Read line: $ls -->".$this->crlf;
		$li = 0 ; $iii = $iii + 1;
		$token = $this->gettoken($ls, $li, $eol);
		
		switch ($token) {
			case "":
			case "STS":
			case "PROTSIM":
			case "SCOUNT":
			case "\/\/":
				break;
			
			case "ID":
				$seq_seq_no=0;
				$c = "insert into ".$gc."data values ($id, '$title1', '$gene', '$cytoband', '$mgi', '$locus', '$chromo')";
			
				if($get=mysql_query($c)) {
					if(!($offline)) echo ".<!-- $id $title1 -->".$this->crlf;
				} else {
					$this->diagnostic("Error $c ".$this->crlf.mysql_error(),1);
				}
				$ls = trim(substr($ls,$li));
				$id = (int)(substr($ls,strpos($ls,".")+1));
				break;
			
			case "TITLE":
				$title1 = addslashes(trim(substr($ls,$li)));
				break;
			
			case "GENE":
				$gene = addslashes(trim(substr($ls,$li)));
				break;
				
			case "CYTOBAND":
				$cytoband = trim(substr($ls,$li));
				break;
				
			case "MGI":
				$mgi = trim(substr($ls,$li));
				break;
				
			case "LOCUSLINK":
				$locus = trim(substr($ls,$li));
				break;
				
			case "CHROMOSOME":
				$chromo = trim(substr($ls,$li));
				break;
				
			case "SEQUENCE":
				$seq_seq_no = $seq_seq_no + 1;
				$token = $this->gettoken($ls, $li, $eol);
				$li = $li + 1;
				$acc = $this->gettoken($ls, $li, $eol);
				//
				//trim any trailing ".n" from acc before inserting
				if (!(strpos($acc,".") === false)) {
					$acc = substr($acc,0,strpos($acc,"."));
				}

				$c = "insert into ".$gc."sequence values ($id, $seq_seq_no, '$acc')";
				$get=mysql_query($c);
				break;
				
			case "EXPRESS":
				$exp_seq_no = 1;
				$tissue = $this->gettissue($ls, $li, $eol);
				while ($eol == 'N') {
					$c = "insert into ".$gc."express values ($id, $exp_seq_no, '$tissue')";
					$get=mysql_query($c);
					$exp_seq_no += 1;
					$li += 1;
					$tissue = $this->gettissue($ls, $li, $eol);
				}
		}

		$ls = gzgets($fp);
	}

	//writes last Data record if 2 slashes followed by eof
	$token = $this->gettoken($ls, $li, $eol);
	if($token=="\/\/"){
		$c = "insert into ".$gc."data values ($id, '$title1', '$gene', '$cytoband', '$mgi', '$locus', '$chromo')";
		$get=mysql_query($c);
	}

	$get=mysql_query('commit');
	$this->diagnostic("Parse complete ".date("Y-m-d h:m:s"),1);
	
	//close and erase temp file
	gzclose($fp);
	unlink($this->local_file_dir.$gc.".data.gz");
	
	//get and store build number
	$conn_id = $this->ncbi_connect();
	
	//this is the working version of read_info used in update_genome_tables
	//$info = $this->read_info($this->genome_dir[$i]."/".$dir['name'],$conn_id);

	$info = $this->read_info($this->genome_dir_assoc[$gc]."/".ucfirst($gc).".info",$conn_id);
	$this->diagnostic("parser gets info file for genome: ".$gc." Build: ".$info['build'],2);
	$this->ncbi_close($conn_id);

	//set new infodatelocal in unigene to mark download
	$sql = "update unigene set infodatelocal='".date("Y-m-d")."', buildlocal='".$info['build']."' where genomecode='".$gc."'";
	if(!(mysql_query($sql))) {
		$this->diagnostic("Error updating unigene table: $sql".$this->crlf.mysql_error(),1);
		return(false);
	} else {
		$this->diagnostic("Parser updated unigene: ".$sql,2);
		return(true);
	}

}

function construct_sql($s_num,$mm22k,$rn5k,$rn8k,$qh,$qm,$qr,$qr8,&$qy)
{
	if (substr($s_num,5,1) == 'H') {
		$qy = $qh." and serial_number > '$s_num'";
		if ($mm22k) {
			$qy .= " union $qm";
		}
		if ($rn5k) {
			$qy .= " union $qr";
		} elseif ($rn8k) {
			$qy .= " union $qr8";
		}
	} elseif (substr($s_num,5,1) == 'M') {
		$qy = $qm." and serial_number > '$s_num'";
		if ($rn5k) {
			$qy .= " union $qr";
		} elseif ($rn8k) {
			$qy .= " union $qr8";
		}
	} elseif (substr($s_num,5,1) == 'R') {
		if ($rn5k) {
			$qy = $qr." and serial_number > '$s_num'";
		} elseif ($rn8k) {
			$qy = $qr8." and serial_number > '$s_num'";
		}
	}
}

function keyword_search($keys,$species,$limit=10) {
	if(empty($keys)) {
		echo "<!--No search keys entered-->".$this->crlf;
		return(0);
	} else {
		echo "<!--Keys is $keys -->".$this->crlf;
	}
	
	if(empty($species)){
		echo "<!--No species selected-->".$this->crlf;
		return(0);
	} else {
		echo "<!--Species is $species -->".$this->crlf;
		echo "<!--Size of species is ".sizeof($species)." -->".$this->crlf;
	}

$sql_species = <<<EOL
select chiplist.id id, chiplist.chip_table chip, chiplist.species_code code,
chiplist.chip_key chip_key, unigene.title title 
from chiplist left join unigene
on chiplist.species_code=unigene.genomecode
EOL;

$sql_srch = <<<EOL
SELECT id, chip_acc, chip_id, table_id, data_id, data_title, data_gene, 
format((MATCH (keywords) AGAINST ('%s')),3) AS score 
FROM gendex
WHERE table_id %s
ORDER BY score DESC
LIMIT %s
EOL;

if(is_array($species)){
	$where = "= ";
	for($i=0;$i<(sizeof($species)-1);$i++) {
		$where .= $species[$i]." OR table_id = ";
	}
	$where .= $species[sizeof($species)-1];
} else {
	$where = "= ".$species;
}

$sql = sprintf($sql_srch,$keys,$where,$limit);

echo "<!--sql is $sql -->".$this->crlf;

$this->db_connect();

$result = mysql_query($sql_species);
while($srow=mysql_fetch_array($result)){
	$chipnames[$srow['id']]=$srow['chip'];
	$genomecode[$srow['id']]=$srow['code'];
	$title[$srow['id']]=$srow['title'];
	$chip_key[$srow['id']]=$srow['chip_key'];
}

$result=mysql_query($sql);

if(mysql_num_rows($result)==0){
	echo "<p>No genes returned.</p>".$this->crlf;
	return(1);
}


echo "<table>";
echo "<tr><th>Species</th><th>Chip</th><th>ID</th><th>GenBank<br>Accession</th><th>Unigene</th>"
	."<th>Title</th><th>Score</th></tr>".$this->crlf;

while($row=mysql_fetch_assoc($result)){
	if($row['score']>0){
	//output row
	echo "<tr>";
	echo "<td>".$title[$row['table_id']]."</td>";
	echo "<td>".$chipnames[$row['table_id']]."</td>";
	echo "<td><a href='detail.php?chip=".$chip_key[$row['table_id']]."&id=".$row['chip_id']."' "
		.">".$chip_key[$row['table_id']].":".$row['chip_id']."</a></td>";
	echo "<td><a href='http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Search&db=Nucleotide&term="
		.$row['chip_acc']."' target='_new'>".$row['chip_acc']."</a></td>";
	echo "<td><a href='http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG="
		.ucfirst($genomecode[$row['table_id']])."&CID="
		.$row['data_id']."' target='_new'>".ucfirst($genomecode[$row['table_id']]).".".$row['data_id']."</a></td>";
	echo "<td>".$row['data_title']."</td>";
	echo "<td>".$row['score']."</td>";
	echo "</tr>".$this->crlf;
	}
}
	echo "</table>".$this->crlf;	
}

function search_form($keys,$species,$limit=10){
	
$sql_species = <<<EOL
select chiplist.id id, chiplist.chip_table chip, chiplist.species_code code,
unigene.title title 
from chiplist left join unigene
on chiplist.species_code=unigene.genomecode
EOL;

$sql_chips_avail = <<<EOL
select distinct table_id from gendex
EOL;

	
	$this->db_connect();

	$result = mysql_query($sql_species);
	while($srow=mysql_fetch_array($result)){
		$chipnames[$srow['id']]=$srow['chip'];
		$genomecode[$srow['id']]=$srow['code'];
		$title[$srow['id']]=$srow['title'];
	}
	
	$result = mysql_query($sql_chips_avail);
	$i=0;
	while($srow=mysql_fetch_array($result)){
		$chipsavail[$i] = $srow['table_id'];
		$i++;
	}
	
	echo "<form action='index.php' method='post' name='kwsearch'>".$this->crlf;
	echo "<table>".$this->crlf;
	echo "<tr><td>Select species: ";
	foreach ($chipsavail as $chipvalue) {
		$check = false;
		for($i=0;$i<sizeof($species);$i++){
			if($chipvalue==$species[$i]) $check = true;
		}
		echo "<input type='checkbox' name='species[]' value='"
			.$chipvalue."' "
			.($check ? " checked " : "").">";
		echo $title[$chipvalue]." ";
	}
	echo "</td></tr>".$this->crlf;
	echo "<tr><td>Keywords: <input size='50' name='keywords' value='"
		.$keys."'></td></tr>".$this->crlf;
	echo "<tr><td>Number of genes returned: <input size='10' name='limit' value='"
		.$limit."'></td></tr>".$this->crlf;
	echo "</table>";
	echo "<input type='submit' name='go' value='Search'>";
	echo "</form>".$this->crlf;
	
}

function add_to_queue($queue){
	$this->db_connect();
	
	// retrieve genomecode list and old values
	$sql_select = "select genomecode, queue from unigene";
	$result = mysql_query($sql_select);
	$i=0;
	while($row=mysql_fetch_assoc($result)){
		$gc[$i]=$row['genomecode'];
		$q[$i]=$row['queue'];
		$i++;
	}
	
	$sql_set = "update unigene set queue='1' where genomecode='";
	$sql_clear = "update unigene set queue='0' where genomecode='";
	
	for($i=0;$i<sizeof($gc);$i++){
		$testresult=array_search($gc[$i],$queue);
		if(array_search($gc[$i],$queue)===false){

			// genomecode not checked, if queue set in db, must clear
			if($q[$i] == 1){
				//queue is checked in db, must clear
				$result = mysql_query($sql_clear.$gc[$i]."'");
				if(mysql_errno() == 0){
					echo "<!--Cleared queue for $gc[$i] -->".$this->crlf;
				} else {
					echo "<p>Error clearing queue for $gc[$i] <br>Sql: ".$sql.$gc[$i]."'"."<br>Affected rows: ".mysql_affected_rows()."<br>Error message: ".mysql_error()."</p>".$this->crlf;
				}
			}

		} else {
			// found a checked genomecode

			if($q[$i] != 1){
				// queue for checked genomecode must be turned on

				$result = mysql_query($sql_set.$gc[$i]."'");
				if(mysql_errno() == 0){
					echo "<!--Updated queue for $gc[$i] -->".$this->crlf;
				} else {
					echo "<p>Error updating queue for $gc[$i] <br>Sql: ".$sql.$gc[$i]."'"."<br>Affected rows: ".mysql_affected_rows()."<br>Error message: ".mysql_error()."</p>".$this->crlf;
				}
			} else {
				// queue for checked genomecode is already turned on
				echo "<!--Queue was already turned on for $gc[$i] -->".$this->crlf;
			}

		}
	}
	return(true);

}

function update_requested(){
	$this->db_connect();
	$sql = "select genomecode from unigene where queue='1'";

	$result = mysql_query($sql);
	$list=false;
	$i=0;

	while($row = mysql_fetch_assoc($result)){
		$list[$i] = $row['genomecode'];
		$this->diagnostic("genomecode selected for update: ".$list[$i],2);
		$i++;
	}
	return($list);


}

function clear_queue($gc){
	$this->db_connect();
	$sql = "update unigene set queue='2' where genomecode='$gc'";
	// queue of 2 means to update gendex
	$result=mysql_query($sql);
	
	if(mysql_affected_rows()==1){
		$this->diagnostic("clear_queue success: ".$sql,2);
		return(true);
	} else {
		$this->diagnostic("Error clearing queue for $gc ",1);
		$this->diagnostic($sql." Error: ".mysql_error(),1);
		return(false);
	}
}

function update_gendex(){

	// set up format strings for sql commands

	$sql_get_first_gc =<<<EOL
select * from unigene where queue='2'
limit 1
EOL;

	$sql_get_count_gc =<<<EOL
select count(*) as nrows from unigene where queue='2'
EOL;

	$sql_species_in_gendex =<<<EOL
select count(*) as nrows
from chiplist left join gendex on chiplist.id=gendex.table_id
where chiplist.species_code='%s'
EOL;

	$sql_get_chiplist =<<<EOL
select * from chiplist where species_code='%s'
EOL;

	$sql_update_gendex_ug =<<<EOL
update gendex, %ssequence
set gendex.data_id = %ssequence.id
where gendex.chip_acc = %ssequence.acc and gendex.table_id=%d
EOL;

	$sql_update_gendex_title =<<<EOL
update gendex, %sdata
set gendex.data_title = %sdata.title, gendex.data_gene = %sdata.gene 
where gendex.data_id = %sdata.id and gendex.table_id=%d
EOL;

	$sql_update_gendex_keywords =<<<EOL
update gendex, %s
set gendex.keywords=concat_ws(' ', gendex.chip_acc, gendex.data_title, gendex.data_gene, 
%s.description) 
where gendex.chip_id = %s.id and gendex.table_id=%d
EOL;

	$sql_clear_queue =<<<EOL
update unigene set queue='0' where genomecode='%s'
EOL;

	$this->diagnostic("------Update_gendex Begin-------",1);

	// connect to db
	$this->db_connect();
	
	// determine if any genomecodes selected for refreshing
	$result = mysql_query($sql_get_count_gc);
	$any_gc = mysql_fetch_assoc($result);
	if(!($any_gc['nrows']>0)){
		$this->diagnostic("No genomecodes selected for refresh.",1);
		return(false);
	}
	

	// retreive first genomecode with queue set to 2
	$result = mysql_query($sql_get_first_gc);

	if(mysql_errno()){
		$this->diagnostic("Failed to retrieve genomecode $gc from chiplist.",1);
		$this->diagnostic("Sql: ".$sql_get_first_gc,1);
		$this->diagnostic("Error message: ".mysql_error(),1);
		return(false);
	}

	if($row=mysql_fetch_assoc($result)){
		$gc = $row['genomecode'];
	}

	$this->diagnostic("Genomecode $gc selected for gendex refresh.",1);

	// retrieve info on selected gc
	$result = mysql_query(sprintf($sql_species_in_gendex, $gc));
	$nrows = mysql_fetch_assoc($result);
	if($nrows['nrows']>0){
		//this species code points to rows in gendex so update is required

		$result = mysql_query(sprintf($sql_get_chiplist, $gc));

		if(mysql_errno()){
			$this->diagnostic("Failed to retrieve genomecode $gc from chiplist.",1);
			$this->diagnostic("Sql: ".sprintf($sql_get_chiplist, $gc),1);
			$this->diagnostic("Error message: ".mysql_error(),1);
			return(false);
		} else {
			$this->diagnostic("Retrieved genomecode $gc using: ".sprintf($sql_get_chiplist, $gc),2);
		}

		if($row=mysql_fetch_assoc($result)){
			$table_number = $row['id'];
			$chip_table = $row['chip_table'];
		}

		// step 1 -- update unigene accession number for all rows in gendex for this species
		$result = mysql_query(sprintf($sql_update_gendex_ug,$gc,$gc,$gc,$table_number));

		if(mysql_errno()){
			$this->diagnostic("Failed to update unigene id in gendex for genomecode $gc .",1);
			$this->diagnostic("Sql: ".sprintf($sql_update_gendex_ug,$gc,$gc,$gc,$table_number),1);
			$this->diagnostic("Error message: ".mysql_error(),1);
			return(false);
		} else {
			$this->diagnostic("Updated unigene ids for genomecode $gc, ".mysql_affected_rows()." rows updated.",1);
			$this->diagnostic("Using: ".sprintf($sql_update_gendex_ug,$gc,$gc,$gc,$table_number),2);
		}

		// step 2 -- update unigene titles and genes for all rows in gendex for this species
		$result = mysql_query(sprintf($sql_update_gendex_title, $gc, $gc, $gc, $gc, $table_number));

		if(mysql_errno()){
			$this->diagnostic("Failed to update titles and genes in gendex for genomecode $gc .",1);
			$this->diagnostic("Sql: ".sprintf($sql_update_gendex_title, $gc, $gc, $gc, $gc, $table_number),1);
			$this->diagnostic("Error message: ".mysql_error(),1);
			return(false);
		} else {
			$this->diagnostic("Updated unigene titles and genes for genomecode $gc, ".mysql_affected_rows()." rows updated.",1);
			$this->diagnostic("Using: ".sprintf($sql_update_gendex_title, $gc, $gc, $gc, $gc, $table_number),2);
		}

		// step 3 -- update keyword fields for all rows in gendex for this species
		$result = mysql_query(sprintf($sql_update_gendex_keywords,$chip_table, $chip_table, $chip_table, $table_number));

		if(mysql_errno()){
			$this->diagnostic("Failed to update keywords in gendex for genomecode $gc .",1);
			$this->diagnostic("Sql: ".sprintf($sql_update_gendex_keywords,$chip_table, $chip_table, $chip_table, $table_number),1);
			$this->diagnostic("Error message: ".mysql_error(),1);
			return(false);
		} else {
			$this->diagnostic("Updated keywords for genomecode $gc, ".mysql_affected_rows()." rows updated.",1);
			$this->diagnostic("Using: ".sprintf($sql_update_gendex_keywords,$chip_table, $chip_table, $chip_table, $table_number),2);
		}

		// step 4 -- clear genomecode queue entry in unigene
		$result = mysql_query(sprintf($sql_clear_queue, $gc));

		if(mysql_errno()){
			$this->diagnostic("Failed to clear queue in unigene for genomecode $gc .",1);
			$this->diagnostic("Sql: ".sprintf($sql_clear_queue, $gc),1);
			$this->diagnostic("Error message: ".mysql_error(),1);
			return(false);
		} else {
			$this->diagnostic("Cleared queue flag for genomecode $gc in unigene.",1);
			$this->diagnostic("Using: ".sprintf($sql_clear_queue, $gc),2);
		}


		return(true);
	} else {
		// there are no rows in gendex for species selected, dump with message
		$this->diagnostic("Genomecode $gc not present in gendex.  No update required.",1);
		return(true);
	}




}

}
?>