Well, here's the code that's driving me batty... Regards, Serge ============================================== class Protein // PDB version { var $class; var $dep_date; var $id_code; // OBSLTE group var $date_rep; // short for 'date replaced'. var $new_id_code; // array of id codes. // TITLES group var $titles; // array of titles. // CAVEAT group var $caveats; // array of caveats. // COMPND group var $compounds; // array of compound entries, each entry is // a string of this form: // MOL_ID: 1; token1: value1; token2: value2; // SOURCE group var $sources; // array of info about biological sources of // the molecules. // KEYWDS group var $keywords; // array of keywords (strings). // EXPDTA group var $expdta; // array of experimental (technique?) data. // AUTHOR group var $authors; // array of authors (strings) // REVDAT group var $revdat; // array of REVISION DATA (2D assoc array). var $sprsde; // array of SUPERSEDED ENTRIES var $journal; var $remark1; var $remark2; var $remark3; var $remark4; // DBREF group var $dbrefs; // array of database references // SEQADV group var $seqadv; // array of seqadv records // SEQRES group var $seqres; // array of SEQUENCE RESIDUE records // MODRES group var $modres; // array of MODIFICATION OF RESIDUE entries // HET group var $hets; // array of HETEROGENOUS ATOMS // HETNAM group var $hetnams; // array of HETEROGENOUS (ATOMS) NAMES // HETSYN group var $hetsyns; // array of SYNONYMS for HETEROGENOUS ATOMS // FORMUL group var $het_formulas; // array of (CHEMICAL) FORMULAS FOR // HETEROGENOUS ATOMS // HELIX group var $helix; // array of HELICES (associative array). // SHEET group var $sheets; // array of SHEETS (secondary structures) stored // as assoc array. // TURN group var $turns; // array of TURNS (2ndary structures) // SSBOND group var $ssbonds; // array of disulfide bonds in protein and polypeptide structures. // LINK group var $links; // array of links (between residues). // HYDBND group var $hydbnds; // array of hydrogen bonds(?). // SLTBRG group var $sltbrgs; // array of salt bridges b/w residues. // CISPEP group var $cispeps; // array of Cis peptides (those with // omega angles of 0°±30°. // Deviations larger than 30° are listed in // REMARK 500. // SITE group var $sites; // array of significant sites in the // macromolecule. // CRYST1 group var $cryst1; // array of CRYST1 unit cell parameters. // ORIGX group var $origx; // array of coordinates. // SCALE group var $scale; // array of scales; // MTRIX group var $matrix; // array of matrices. // TVECT group var $tvect; // array of translation vectors. // MODEL group var $model; // array of atomic models (skip for now). // ATOM group var $atoms; // array of ATOMs // SIGATM group var $sigatms; // array of STANDARD DEVIATIONS OF // ATOMIC PARAMETERS // Some 10 other PDB data fields/sections I haven't assigned // a PROTEIN attribute/property name yet... } function parse_protein_pdb($flines) { $outer = array(); $in_title_flag = FALSE; $title_string = ""; $aTitles = array(); $in_caveat_flag = FALSE; $cav_string = ""; $aCaveats = array(); $aHelix = array(); $aSheets = array(); $aTurns = array(); $aSSBonds = array(); // other code that initializes variables used by the // following IF statements. while ( list($no, $linestr) = each($flines) ) { // opens outermost WHILE $label = trim(left($linestr, 6)); $data = trim(substr($linestr, 9)); // Check for UNCLOSED items by looking at flag variables, etc. // UNCLOSED means they have not yet been stored to the proper // variables (e.g. class property/attribute or some temporary // placeholder). To CLOSE means to store data into them. if ($in_remark1_flag) { // code to "close" REMARK 1 section. } if ($in_title_flag) { // code to "close" REMARK 1 section. } // a whole lot of "CLOSING ROUTINES"... // ID data field - possibly the simplest of them all! if ($label == "HEADER") { $class = substr($linestr,10,40); $dep_date = substr($linestr,50,9); $id_code = substr($linestr,62,4); } // OBSLTE - OBSOLETE data field if ($label == "OBSLTE") { // code to extract data from OSBLTE data field/section. } // Other code to handle 30 plus data fields/sections in PDB. // Almost all of them start with: IF ($label == "LABEL") } $oProtein->class = $class; $oProtein->dep_date = $dep_date; // other code to store extracted data in PROTEIN class. } Need a new email address that people can remember Check out the new EudoraMail at http://www.eudoramail.com