/**++
 *   
 *   LICENSE
 *   -------
 *   
 *   Copyright (c) 2004 Renato Mancuso
 *   All rights reserved.
 *   
 *   Redistribution and use in source and binary forms, with or without modification, are 
 *   permitted provided that the following conditions are met:
 *   
 *   - Redistributions of source code must retain the above copyright notice, this list 
 *     of conditions and the following disclaimer.
 *   
 *   - Redistributions in binary form must reproduce the above copyright notice, this list
 *     of conditions and the following disclaimer in the documentation and/or other materials 
 *     provided with the distribution.
 *   
 *   - Neither the name of Renato Mancuso nor the names of its contributors may be used to 
 *     endorse or promote products derived from this software without specific prior written 
 *     permission.
 *   
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS 
 *   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
 *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
 *   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 *   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 *   IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
 *   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *   
--**/


#include "XmlConverter.hpp"

#include <cerrno>
#include <cstring>
#include <cstdio>
#include <sstream>
#include <iomanip>
#include <Regex.hpp>

#include "Exception.hpp"

#include <cassert>

using namespace OpenEMBL::Phoenix;


namespace OpenEMBL
{
namespace ff2xml
{


    /* class static */
    const XmlAttributeList XmlConverter::EMPTY_ATTRIBUTE_LIST;

    /* class static */
    const std::vector<std::string> XmlConverter::EMPTY_EDITOR_LIST;

    /* class static */
    const std::vector<std::string> XmlConverter::EMPTY_LOCATOR; 

    /* class static */
    const std::vector<std::string> XmlConverter::EMPTY_PATENT_APPLICANT_LIST;

    XmlConverter::XmlConverter(const char * srcFilename, const char* destFilename)
        : base(srcFilename)
        , _processedCount(0)
        , _destFilename(destFilename)
    {
        assert(NULL != srcFilename);
        assert(NULL != destFilename);
    }

    XmlConverter::~XmlConverter() throw()
    {
    }

    std::string XmlConverter::makeString(std::vector<std::string> const & theLines)
    {
        std::ostringstream oss;

        for (size_t i = 0; i < theLines.size(); ++i)
        {
            oss << theLines[0];
            break;
        }

        for (size_t i = 1; i < theLines.size(); ++i)
        {
            oss << " " << theLines[i];
        }

        return oss.str();
    }

    int XmlConverter::getMonth(std::string const & month)
    {
        static char const* names[] = {
            "JAN", "FEB", "MAR", "APR", "MAY", "JUN", 
            "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
        };

        int theResult = 0;

        for (int i = 0; i < 12; ++i)
        {
            if (month == names[i])
            {
                theResult = i + 1;        
                break;
            }
        }

        if (0 == theResult)
            throw Exception("invalid month (" + month + ")");

        return theResult;
    }

    std::string XmlConverter::makeDate(std::string const & str)
    {
        //
        // regex is: (\d+)-([A-Z]{3})-(\d+)
        //
        static Regex theRegex("(\\d+)-([A-Z]{3})-(\\d+)");
                                     
        Match theMatches;

        if (!regexSearch(str, theMatches, theRegex))
        {
            std::ostringstream oss;
            oss << "bad date: " << str;
            throw Exception(oss.str());
        }

        std::ostringstream oss;
        
        oss << theMatches.str(3) 
            << "-"
            << std::setfill('0') << std::setw(2)
            << getMonth(theMatches.str(2))
            << "-"
            << std::setfill('0') << std::setw(2)
            << theMatches.str(1)
            ;

        return oss.str();
    }

    METHODIMP XmlConverter::parse()
    {
        _xml.open(_destFilename.c_str());

        _xml.writeXMLDeclaration("1.0", "UTF-8");

        XmlAttributeList attributes;

        _xml.writeBeginTag(
            XmlWriter::SINGLELINE,
            0,
            "EMBL",
            attributes);

        _xml.newLine();

        base::parse();

        _xml.writeEndTag("EMBL");
        _xml.close();
    }

    METHODIMP XmlConverter::onError(long theLineNumber, int theErrorCode, LPCSTR theMessage)
    {
        base::onError(theLineNumber, theErrorCode, theMessage);        
        throw Exception("aborting...");
    }

    METHODIMP XmlConverter::onBeginEntry(long theLineNumber)
    {
        base::onBeginEntry(theLineNumber);

        // reset entry-level values to default
        _entry          = EntryHeader();
        _publication    = PublicationInfo();
        _taxonomyInfo   = TaxonomyInfo();

        // update counters
        _processedCount++;      
    }

    METHODIMP XmlConverter::onEndEntry()
    {
        base::onEndEntry();

        _xml.writeIndent(1);
        _xml.writeEndTag("entry");
    }

    METHODIMP XmlConverter::onID(long theLineNumber, Phoenix::IDLine const & theField)
    {
        // accumulate
        _entry.id = theField;

		if (theField.SequenceVersion != 0)
		{
			// r87+ files, take sequence version data from ID line
			_entry.sv.AccessionNumber = theField.EntryName;
			_entry.sv.Version = theField.SequenceVersion;
		}

        // display ID line for user benefit
        fprintf(stdout, "*** %s\n", theField.EntryName.c_str());
    }

    METHODIMP XmlConverter::onAC(long theLineNumber, Phoenix::ACLine const & theField)
    {
        // accumulate
        _entry.ac = theField;
    }

    METHODIMP XmlConverter::onSV(long theLineNumber, Phoenix::SVLine const & theField)
    {
        // accumulate
        _entry.sv = theField;
    }

    METHODIMP XmlConverter::onDT(long theLineNumber, Phoenix::DTLine const & theField)
    {
        // accumulate
        _entry.dt = theField;

        //
        // render <entry>
        //
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("accession", _entry.ac.AccessionNumberList[0]));
        attributes.push_back(XmlAttribute("name", _entry.id.EntryName));
        attributes.push_back(XmlAttribute("division", _entry.id.Division));
        attributes.push_back(XmlAttribute("created", makeDate(_entry.dt.FirstCreated)));
        attributes.push_back(XmlAttribute("lastUpdated", makeDate(_entry.dt.LastUpdated)));
        attributes.push_back(XmlAttribute("releaseCreated", _entry.dt.ReleaseCreated));
        attributes.push_back(XmlAttribute("releaseLastUpdated", _entry.dt.ReleaseLastUpdated));
        attributes.push_back(XmlAttribute("version", _entry.dt.ExternalVersion));

        _xml.writeBeginTag(XmlWriter::MULTILINE, 1, "entry", attributes);
        _xml.newLine();

        //
        // render <secondaryAccession>
        //
        for (size_t i = 1; i < _entry.ac.AccessionNumberList.size(); ++i)
        {
            _xml.writeElement(
                2, 
                "secondaryAccession", 
                EMPTY_ATTRIBUTE_LIST, 
                _entry.ac.AccessionNumberList[i] );
        }
    }

    METHODIMP XmlConverter::onDE(long theLineNumber, Phoenix::DELine const & theField)
    {
        //
        // render <description>
        //
        _xml.writeElement(
            2, 
            "description", 
            EMPTY_ATTRIBUTE_LIST, 
            theField.Text );
    }

    METHODIMP XmlConverter::onKW(long theLineNumber, Phoenix::KWLine const & theField)
    {
        // filter out empty keyword lists
        if (theField.Keywords.empty() || theField.Keywords[0] == "")
            return;

        //
        // render <keyword>
        //
        for (size_t i = 0; i < theField.Keywords.size(); ++i)
        {
            _xml.writeElement(
                2, 
                "keyword", 
                EMPTY_ATTRIBUTE_LIST, 
                theField.Keywords[i] );
        }
    }

    METHODIMP XmlConverter::onBeginTaxonomyLines(long theLineNumber)
    {
        // accumulate
        _taxonomyInfo = TaxonomyInfo();
    }

    METHODIMP XmlConverter::onOS(long theLineNumber, Phoenix::OSLine const & theField)
    {
        // accumulate
        _taxonomyInfo.os = theField;
    }

    METHODIMP XmlConverter::onOC(long theLineNumber, Phoenix::OCLine const & theField)
    {
        // accumulate
        _taxonomyInfo.oc = theField;
    }

    METHODIMP XmlConverter::onOG(long theLineNumber, Phoenix::OGLine const & theField)        
    {
        // accumulate
        _taxonomyInfo.og.push_back(theField);
    }

    METHODIMP XmlConverter::onEndTaxonomyLines()
    {
        // accumulate
        _entry.tx.push_back(_taxonomyInfo);
    }

    METHODIMP XmlConverter::onBeginPublication(long theLineNumber)
    {
        // reset publication data
        _publication = PublicationInfo();

        //
        // render <reference>
        //
        _xml.writeBeginTag(2, "reference");
        _xml.newLine();
    }

    METHODIMP XmlConverter::onRN(long theLineNumber, Phoenix::RNLine const & theField)
    {
        // accumulate
        _publication.rn = theField;
    }

    METHODIMP XmlConverter::onRC(long theLineNumber, Phoenix::RCLine const & theField)
    {
        // accumulate
        _publication.rc = theField;
    }

    METHODIMP XmlConverter::onRP(long theLineNumber, Phoenix::RPLine const & theField)
    {
        // accumulate
        _publication.rp = theField;
    }

    METHODIMP XmlConverter::onRX(long theLineNumber, Phoenix::RXLine const & theField)
    {
        // accumulate
        _publication.rx.push_back(theField);
    }

    METHODIMP XmlConverter::onRG(long theLineNumber, Phoenix::RGLine const & theField)
    {
        // accumulate
        _publication.rg = theField;
    }

    METHODIMP XmlConverter::onRA(long theLineNumber, Phoenix::RALine const & theField)
    {
        // accumulate
        _publication.ra = theField;
    }

    METHODIMP XmlConverter::onRT(long theLineNumber, Phoenix::RTLine const & theField)
    {
        // accumulate
        _publication.rt = theField;
    }

    METHODIMP XmlConverter::onSubmission(long theLineNumber, Phoenix::SubmissionInfo const & theField)
    {
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("id", _publication.rn.ReferenceNumber));
        attributes.push_back(XmlAttribute("type", "submission"));
        attributes.push_back(XmlAttribute("date", makeDate(theField.SubmissionDate)));

        renderCitation(
            XmlWriter::SINGLELINE,
            attributes,
            EMPTY_EDITOR_LIST,
            theField.Info );
    }

    METHODIMP XmlConverter::onUnpublished(long theLineNumber, Phoenix::UnpublishedInfo const & theField)
    {
        //
        // render <citation>
        //
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("id", _publication.rn.ReferenceNumber));
        attributes.push_back(XmlAttribute("type", "unpublished"));

        renderCitation(XmlWriter::SINGLELINE, attributes);
    }

    METHODIMP XmlConverter::onBook(long theLineNumber, Phoenix::BookInfo const & theField)
    {
        //
        // render <citation>
        //
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("id", _publication.rn.ReferenceNumber));
        attributes.push_back(XmlAttribute("type", "book"));
        attributes.push_back(XmlAttribute("name", theField.Title));
        attributes.push_back(XmlAttribute("first", theField.FirstPage));
        attributes.push_back(XmlAttribute("last", theField.LastPage));
        attributes.push_back(XmlAttribute("year", theField.PublicationDate));
        attributes.push_back(XmlAttribute("publisher", theField.Publisher));

        renderCitation(
            XmlWriter::MULTILINE,
            attributes,
            theField.Editors );
    }

    METHODIMP XmlConverter::onThesis(long theLineNumber, Phoenix::ThesisInfo const & theField)
    {
        //
        // render <citation>
        //
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("id", _publication.rn.ReferenceNumber));
        attributes.push_back(XmlAttribute("type", "thesis"));
        attributes.push_back(XmlAttribute("institute", makeString(theField.Institute)));
        attributes.push_back(XmlAttribute("year", theField.Year));

        renderCitation(XmlWriter::MULTILINE, attributes);
    }

    METHODIMP XmlConverter::onPatent(long theLineNumber, Phoenix::PatentInfo const & theField)
    {
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("id", _publication.rn.ReferenceNumber));
        attributes.push_back(XmlAttribute("type", "patent"));
        attributes.push_back(XmlAttribute("date", makeDate(theField.Date)));
        attributes.push_back(XmlAttribute("patentNumber", theField.PatentNumber));

        renderCitation(
            XmlWriter::SINGLELINE, 
            attributes,
            EMPTY_EDITOR_LIST,
            EMPTY_LOCATOR,
            theField.Applicants );
    }

    METHODIMP XmlConverter::onOnlineJournal(long theLineNumber, Phoenix::OnlineJournalInfo const & theField)
    {
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("id", _publication.rn.ReferenceNumber));
        attributes.push_back(XmlAttribute("type", "online journal article"));

        renderCitation(XmlWriter::SINGLELINE, attributes);
    }

    METHODIMP XmlConverter::onElectronicResource(long theLineNumber, Phoenix::ElectronicResourceInfo const & theField)
    {
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("id", _publication.rn.ReferenceNumber));
        attributes.push_back(XmlAttribute("type", "electronic resource"));

        renderCitation(
            XmlWriter::MULTILINE,
            attributes,
            EMPTY_EDITOR_LIST,
            theField.FreeText );
    }

    METHODIMP XmlConverter::onMiscResource(long theLineNumber, Phoenix::MiscResourceInfo const & theField)
    {
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("id", _publication.rn.ReferenceNumber));
        attributes.push_back(XmlAttribute("type", "miscellaneous resource"));

        renderCitation(
            XmlWriter::MULTILINE,
            attributes,
            EMPTY_EDITOR_LIST,
            theField.FreeText );
    }

    METHODIMP XmlConverter::onJournalArticle(long theLineNumber, Phoenix::JournalArticleInfo const & theField)
    {
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("id", _publication.rn.ReferenceNumber));
        attributes.push_back(XmlAttribute("type", "journal article"));
        attributes.push_back(XmlAttribute("name", theField.JournalName));
        attributes.push_back(XmlAttribute("volume", theField.Volume));

        if (theField.Issue != "")
            attributes.push_back(XmlAttribute("issue", theField.Issue));

        attributes.push_back(XmlAttribute("first", theField.FirstPage));
        attributes.push_back(XmlAttribute("last", theField.LastPage));
        attributes.push_back(XmlAttribute("year", theField.PublicationDate));

        renderCitation(XmlWriter::MULTILINE, attributes);
    }

    void XmlConverter::renderCitation
        (
        XmlWriter::layout_type              layout,
        XmlAttributeList                    attributes,
        std::vector<std::string> const &    eds,
        std::vector<std::string> const &    locator,
        std::vector<std::string> const &    patentApplicants
        )
    {
        //
        // render <citation>
        //
        _xml.writeBeginTag(
            layout,
            3,
            "citation",
            attributes );
        _xml.newLine();

        //
        // render inner publication tags
        //
        renderPublicationData(eds, locator, patentApplicants);

        //
        // render </citation>
        //
        _xml.writeIndent(3);
        _xml.writeEndTag("citation");
    }

    void XmlConverter::renderPublicationData
        (
        std::vector<std::string> const & eds,
        std::vector<std::string> const & locator,
        std::vector<std::string> const & patentApplicants 
        )
    {
        //
        // render <dbreference>
        //
        for (size_t i = 0; i < _publication.rx.size(); ++i)
        {
            Phoenix::RXLine const & xref = _publication.rx[i];

            XmlAttributeList attributes;

            attributes.push_back(XmlAttribute("db", xref.DatabaseIdentifier));
            attributes.push_back(XmlAttribute("primary", xref.Identifier));

            _xml.writeSingleTag(
                XmlWriter::SINGLELINE,
                4,
                "dbreference",
                attributes );
        }

        //
        // render <title>
        //
        if (_publication.rt.Text.size() > 0 && _publication.rt.Text[0] != "")
        {
            _xml.writeElement(
                4,
                "title",
                EMPTY_ATTRIBUTE_LIST,
                _publication.rt.Text );
        }

        //
        // render <editor>
        //
        for (size_t i = 0; i < eds.size(); ++i)
        {
            _xml.writeElement(
                4,
                "editor",
                EMPTY_ATTRIBUTE_LIST,
                eds[i] );
        }

        //
        // render <consortium>
        //
        if (_publication.rg.Consortium.size() > 0)
        {
            _xml.writeElement(
                4,
                "consortium",
                EMPTY_ATTRIBUTE_LIST,
                _publication.rg.Consortium );
        }

        //
        // render <author>
        //
        for (size_t i = 0; i < _publication.ra.Authors.size(); ++i)
        {
            _xml.writeElement(
                4,
                "author",
                EMPTY_ATTRIBUTE_LIST,
                _publication.ra.Authors[i] );
        }

        //
        // render <patentApplicant>
        //
        for (size_t i = 0; i < patentApplicants.size(); ++i)
        {
            _xml.writeElement(
                4,
                "patentApplicant",
                EMPTY_ATTRIBUTE_LIST,
                patentApplicants[i] );
        }


        //
        // render <locator>
        //
        if (locator.size() > 0)
        {
            _xml.writeElement(
                4,
                "locator",
                EMPTY_ATTRIBUTE_LIST,
                locator );
        }
    }

    METHODIMP XmlConverter::onEndPublication()
    {
        //
        // render <refPosition>
        //
        for (size_t i = 0; i < _publication.rp.Positions.size(); ++i)
        {
            Phoenix::Range theRange = _publication.rp.Positions[i];
            
            XmlAttributeList attributes;

            attributes.push_back(XmlAttribute("begin", theRange.First));
            attributes.push_back(XmlAttribute("end", theRange.Last));

            _xml.writeSingleTag(
                XmlWriter::SINGLELINE,
                3,
                "refPosition",
                attributes );
        }

        //
        // render <comment>
        //
        if (_publication.rc.Comment.size() > 0)
        {
            _xml.writeElement(
                3,
                "comment",
                EMPTY_ATTRIBUTE_LIST,
                _publication.rc.Comment );
        }

        //
        // render closing </reference>
        //
        _xml.writeIndent(2);
        _xml.writeEndTag("reference");
    }

    METHODIMP XmlConverter::onDR(long theLineNumber, Phoenix::DRLine const & theField)
    {
        //
        // render <dbreference>
        //
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("db", theField.DatabaseIdentifier));
        attributes.push_back(XmlAttribute("primary", theField.PrimaryIdentifier));

        if (theField.SecondaryIndentifier != "")
            attributes.push_back(XmlAttribute("secondary", theField.SecondaryIndentifier));

        _xml.writeSingleTag(
            XmlWriter::SINGLELINE,
            2,
            "dbreference",
            attributes );
    }

    METHODIMP XmlConverter::onCC(long theLineNumber, Phoenix::CCLine const & theField)
    {
        //
        // render <comment>
        //
        _xml.writeElement(
            2,
            "comment",
            EMPTY_ATTRIBUTE_LIST,
            theField.Comment );
    }

    METHODIMP XmlConverter::onBeginTPALines(long theLineNumber)
    {
        // ?
    }

    METHODIMP XmlConverter::onAS(long theLineNumber, Phoenix::ASLine const & theField)
    {
        // ?
    }

    METHODIMP XmlConverter::onEndTPALines()
    {
        // ?
    }

    METHODIMP XmlConverter::onBeginFeatureTable(long theLineNumber)
    {
        // nothing to do
    }

    METHODIMP XmlConverter::onBeginFeatureKey(long theLineNumber, Phoenix::FeatureKey const & theField)
    {
        // accumulate
        _feature = theField;
        _qualifiers.clear();

        // 
        // render <feature>
        //
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("name", theField.Key));

        _xml.writeBeginTag(
            XmlWriter::SINGLELINE,
            2,
            "feature",
            attributes );

        _xml.newLine();
    }

    METHODIMP XmlConverter::onFeatureQualifier(long theLineNumber, Phoenix::FeatureQualifier const  & theQualifier)
    {
        // accumulate
        _qualifiers.push_back(theQualifier);

        // but strip quotes from qualifier value
        removeQuotes(_qualifiers[_qualifiers.size() - 1].Value);
    }

    METHODIMP XmlConverter::onEndFeatureKey()
    {
        if (_feature.Key == "source")
        {
            renderOrganismData();
        }        

        renderQualifiers();

        renderLocation(_feature.Location);

        //
        // render </feature>
        //
        _xml.writeIndent(2);
        _xml.writeEndTag("feature");
    }

    void XmlConverter::renderOrganismData()
    {
        std::string         theOrganism     = getOrganismNameFromQualifiers();
        std::string         theTaxId        = getTaxIdFromQualifiers();

        std::string         theCommonName;
        Phoenix::StringList theLineage;

        getTaxonomyInfoForOrganism(
            theOrganism,
            theCommonName,
            theLineage );

        //
        // render <organism>
        //
        _xml.writeBeginTag(
            XmlWriter::SINGLELINE,
            3,
            "organism",
            EMPTY_ATTRIBUTE_LIST );
        _xml.newLine();

        //
        // render <nameset>
        //
        _xml.writeBeginTag(
            XmlWriter::SINGLELINE,
            4,
            "nameset",
            EMPTY_ATTRIBUTE_LIST );
        _xml.newLine();

        //
        // render <scientificName>
        //
        _xml.writeElement(
            5,
            "scientificName",
            EMPTY_ATTRIBUTE_LIST,
            theOrganism );
    
        //
        // render <preferredCommonName>
        //
        if (theCommonName != "")
        {
            _xml.writeElement(
                5,
                "preferredCommonName",
                EMPTY_ATTRIBUTE_LIST,
                theCommonName );
        }

        //
        // render </nameset>
        //
        _xml.writeIndent(4);
        _xml.writeEndTag("nameset");

        //
        // render <taxId>
        //
        _xml.writeElement(
            4,
            "taxId",
            EMPTY_ATTRIBUTE_LIST,
            theTaxId );

        //
        // render <lineage>
        //
        _xml.writeBeginTag(
            XmlWriter::SINGLELINE,
            4,
            "lineage",
            EMPTY_ATTRIBUTE_LIST );
        _xml.newLine();

        
        for (size_t i = 0; i < theLineage.size(); ++i)
        {
            _xml.writeElement(
                5,
                "taxon",
                EMPTY_ATTRIBUTE_LIST,
                theLineage[i] );
        }

        _xml.writeIndent(4);
        _xml.writeEndTag("lineage");

        //
        // render </organism>
        //
        _xml.writeIndent(3);
        _xml.writeEndTag("organism");
    }

    std::string XmlConverter::getOrganismNameFromQualifiers()
    {
        for (size_t i = 0; i < _qualifiers.size(); ++i)
        {
            if (_qualifiers[i].Name == "organism")
                return makeString(_qualifiers[i].Value);
        }

        return std::string("");
    }

    std::string XmlConverter::getTaxIdFromQualifiers()
    {
        for (size_t i = 0; i < _qualifiers.size(); ++i)
        {
            if (_qualifiers[i].Name == "db_xref")
            {
                std::string theDatabase;
                std::string theIdentifier;

                getDatabaseAndIdentifier(
                    makeString(_qualifiers[i].Value),
                    theDatabase,
                    theIdentifier );

                if ("taxon" == theDatabase)
                    return theIdentifier;
            }
        }                

        return std::string("");
    }

    void XmlConverter::getTaxonomyInfoForOrganism
        (
        std::string const &     theOrganism,
        std::string &           theCommonName,
        Phoenix::StringList &   theLineage
        )
    {
        for (size_t i = 0; i < _entry.tx.size(); ++i)
        {
            TaxonomyInfo const & taxonomyInfo = _entry.tx[i];

            std::string theScientificName;
            std::string theGenbankName;

            getScientificAndCommonName(
                makeString(taxonomyInfo.os.Organism),
                theScientificName,
                theGenbankName );

            if (theOrganism != theScientificName)
                continue;

            theCommonName   = theGenbankName;
            theLineage      = taxonomyInfo.oc.Lineage;

            break;
        }
    }

    void XmlConverter::getScientificAndCommonName
        (
        std::string const & str,
        std::string & theScientificName,
        std::string & theCommonName 
        )
    {
        std::string::size_type pos = str.find('(');

        if (std::string::npos == pos)
        {
            theScientificName = str;
            theCommonName.clear();
        }
        else
        {
            theScientificName = str.substr(0, pos);
            theScientificName.erase(theScientificName.find_last_not_of(" \t") + 1);

            theCommonName = str.substr(pos + 1);
            theCommonName.erase(theCommonName.length() - 1);
            theCommonName.erase(theCommonName.find_last_not_of("  \t") + 1);
        }
    }


    void XmlConverter::getDatabaseAndIdentifier
        (
        std::string const & str,
        std::string &       theDatabase,
        std::string &       theIdentifier 
        )
    {
        // /db_xref="SWISS-PROT:P12345"

        std::string::size_type pos = str.find(':');

        if (pos == std::string::npos)
        {
            throw Exception("invalid db_xref format: " + str);
        }

        theDatabase     = str.substr(0, pos);
        theIdentifier   = str.substr(pos + 1); 
    }

    void XmlConverter::removeQuotes(Phoenix::StringList & theLines)
    {
        if (theLines.empty())
            return;

        if (theLines[0].empty())
            return;

        if (theLines[0][0] != '"')
            return;

        // remove first "
        theLines[0].erase(0, 1);

        // remove last "
        theLines[theLines.size() - 1].erase(theLines[theLines.size() - 1].length() - 1, 1);

        static Regex theRegex("\"{2}");

        for (size_t i = 0; i < theLines.size(); ++i)
        {          
            theLines[i] = regexMerge(theLines[i], theRegex, "\"");
        }
    }

    void XmlConverter::renderDbXrefsQualifiers()
    {
        for (size_t i = 0; i < _qualifiers.size(); ++i)
        {
            const std::string & theQualifierName = _qualifiers[i].Name;

            if (theQualifierName == "db_xref")
            {
                std::string theDatabase;
                std::string theIdentifier;
                
                getDatabaseAndIdentifier(
                    _qualifiers[i].Value[0],
                    theDatabase,
                    theIdentifier );

                if (theDatabase == "taxon" || theDatabase == "pseudo")
                    continue;

                XmlAttributeList attributes;

                attributes.push_back(XmlAttribute("db", theDatabase));
                attributes.push_back(XmlAttribute("primary", theIdentifier));

                _xml.writeSingleTag(
                    XmlWriter::SINGLELINE,
                    3,
                    "dbreference",
                    attributes );
            }
        }
    }

    bool XmlConverter::suppressQualifierDisplay(std::string const & theQualifierName)
    {
        if (theQualifierName == "db_xref")
            return true;

        if (theQualifierName == "organism")
            return true;

        return false;
    }

    void XmlConverter::renderQualifiers()
    {
        renderDbXrefsQualifiers();

        for (size_t i = 0; i < _qualifiers.size(); ++i)
        {
            std::string const & theQualifierName = _qualifiers[i].Name;

            if (suppressQualifierDisplay(theQualifierName))
                continue;

            XmlAttributeList attributes;
            attributes.push_back(XmlAttribute("name", _qualifiers[i].Name));

            _xml.writeElement(
                3,
                "qualifier",
                attributes,
                _qualifiers[i].Value );
        }
    }

    void XmlConverter::renderLocation(Phoenix::FeatureLocation const & location)
    {
        //
        // render <location>
        //
        std::string attrType;
        std::string attrComplement;

        if (location.LocationElements.size() > 1)
        {
            if (location.IsJoin)
                attrType = "join";
            else
                attrType = "order";
        }
        else
        {
            attrType = "single";
        }

        if (location.IsComplement)
            attrComplement = "true";
        else
            attrComplement = "false";            

        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("type", attrType));
        attributes.push_back(XmlAttribute("complement", attrComplement));

        _xml.writeBeginTag(
            XmlWriter::SINGLELINE,
            3,
            "location",
            attributes );
        _xml.newLine();

        //
        // renderLocationElements
        // 
        for (size_t i = 0; i < location.LocationElements.size(); ++i)
            renderLocationElement(location.LocationElements[i]);

        //
        // render </location>
        //
        _xml.writeIndent(3);
        _xml.writeEndTag("location");        
    }

    void XmlConverter::renderLocationElement(Phoenix::LocationElement const & theLocationElement)
    {
        unsigned nType = (theLocationElement.e_flags & 0xf);

        std::string attrType;
        std::string attrAccession;
        int         attrVersion;
        std::string attrComplement;

        switch (nType)
        {
        case Phoenix::LOCATION_ELEMENT_SINGLE:
            attrType = "simple";
            break;

        case Phoenix::LOCATION_ELEMENT_INFRABASE:
            attrType = "site";
            break;

        case Phoenix::LOCATION_ELEMENT_RANGE:
            attrType = "range";
            break;

        default:
            throw Exception("unrecognized location element type");
        }

        if (theLocationElement.AccessionNumber != "")
        {
            attrAccession = theLocationElement.AccessionNumber;
            attrVersion   = theLocationElement.SequenceVersion;
        }
        else
        {
            attrAccession = _entry.sv.AccessionNumber;
            attrVersion   = _entry.sv.Version;
        }

        if (theLocationElement.e_flags & 0x80)
            attrComplement = "true";
        else
            attrComplement = "false";

        //
        // render <locationElement>
        //
        XmlAttributeList attributes;        

        attributes.push_back(XmlAttribute("type", attrType));    
        attributes.push_back(XmlAttribute("accession", attrAccession));
        attributes.push_back(XmlAttribute("version", attrVersion));
        attributes.push_back(XmlAttribute("complement", attrComplement));

        _xml.writeBeginTag(
            XmlWriter::SINGLELINE,
            4,
            "locationElement",
            attributes );
        _xml.newLine();

        //
        // render base positions
        //
        switch (nType)
        {
        case Phoenix::LOCATION_ELEMENT_SINGLE:
            renderBasePosition(
                theLocationElement.x1, 
                theLocationElement.x2, 
                theLocationElement.x_flags );
            break;

        case Phoenix::LOCATION_ELEMENT_INFRABASE:
            renderBasePosition(
                theLocationElement.x1, 
                theLocationElement.x2, 
                theLocationElement.x_flags );

            renderBasePosition(
                theLocationElement.y1, 
                theLocationElement.y2, 
                theLocationElement.y_flags );
            break;

        case Phoenix::LOCATION_ELEMENT_RANGE:
            renderBasePosition(
                theLocationElement.x1, 
                theLocationElement.x2, 
                theLocationElement.x_flags );

            renderBasePosition(
                theLocationElement.y1, 
                theLocationElement.y2, 
                theLocationElement.y_flags );
            break;
        }

        //
        // render </locationElement>
        //
        _xml.writeIndent(4);
        _xml.writeEndTag("locationElement");
    }

    void XmlConverter::renderBasePosition(long x1, long x2, unsigned flags)
    {
        XmlAttributeList attributes;

        //
        // render <basePosition>
        //
        switch (flags)
        {
            case Phoenix::BASE_POSITION_SIMPLE_BASE:
                attributes.push_back(XmlAttribute("type", "simple"));
                break;

            case Phoenix::BASE_POSITION_LEFT_OPEN:
                attributes.push_back(XmlAttribute("type", "left open"));
                break;

            case Phoenix::BASE_POSITION_RIGHT_OPEN:
                attributes.push_back(XmlAttribute("type", "right open"));
                break;

            case Phoenix::BASE_POSITION_SPAN:
                {
                    attributes.push_back(XmlAttribute("type", "fuzzy"));

                    std::ostringstream oss;
                    oss << "+" << (x2 - x1);

                    attributes.push_back(XmlAttribute("extent", oss.str()));
                }
                break;           
        }
        
        std::ostringstream oss;
        oss << x1;

        _xml.writeElement(
            5,
            "basePosition",
            attributes,
            oss.str() );
    }

    METHODIMP XmlConverter::onEndFeatureTable()
    {
        // nothing to do
    }

    METHODIMP XmlConverter::onSQ(long theLineNumber, Phoenix::SQLine const & theField)
    {
        // accumulate
        _entry.sq = theField;
    }

    METHODIMP XmlConverter::onBeginSequenceData(long theLineNumber)
    {
        //
        // render <sequence>
        //
        XmlAttributeList attributes;

        attributes.push_back(XmlAttribute("type", _entry.id.MolType));

        if (_entry.id.Circular)
            attributes.push_back(XmlAttribute("topology", "circular"));

        attributes.push_back(XmlAttribute("length", _entry.id.SequenceLength));
        attributes.push_back(XmlAttribute("version", _entry.sv.Version));

        _xml.writeBeginTag(XmlWriter::SINGLELINE, 2, "sequence", attributes);
        _xml.newLine();
    }

    METHODIMP XmlConverter::onSequenceLine(long theLineNumber, Phoenix::SequenceLine const & theField)
    {
        //
        // render inner text of sequence
        //
        _xml.writeIndent(2);
        _xml.writeElementText(theField.Sequence);
        _xml.newLine();
    }

    METHODIMP XmlConverter::onEndSequenceData()
    {
        //
        // render </sequence>
        //
        _xml.writeIndent(2);
        _xml.writeEndTag("sequence");
    }

    METHODIMP XmlConverter::onBeginCOLines(long theLineNumber, bool isComplement)
    {
    }

    METHODIMP XmlConverter::onCOSegmentInfo (long theLineNumber, Phoenix::COSegmentInfo const & theSegmentInfo)
    {
    }

    METHODIMP XmlConverter::onCOGapInfo (long theLineNumber, Phoenix::COGapInfo const & theGapInfo)
    {
    }

    METHODIMP XmlConverter::onEndCOLines()
    {
    }

}
}
