/**++
 *   
 *   LICENSE
 *   -------
 *   
 *   Copyright (c) 2004, 2006 Renato Mancuso
 *   All rights reserved.
 *   
 *   Redistribution and use in source and binary forms, with or without modification, are 
 *   permitted provided that the following conditions are met:
 *   
 *   - Redistributions of source code must retain the above copyright notice, this list 
 *     of conditions and the following disclaimer.
 *   
 *   - Redistributions in binary form must reproduce the above copyright notice, this list
 *     of conditions and the following disclaimer in the documentation and/or other materials 
 *     provided with the distribution.
 *   
 *   - Neither the name of Renato Mancuso nor the names of its contributors may be used to 
 *     endorse or promote products derived from this software without specific prior written 
 *     permission.
 *   
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS 
 *   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
 *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
 *   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 *   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 *   IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
 *   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *   
--**/


#include <Phoenix.hpp>
#include <StringUtils.hpp>
#include <cstdio>
#include <string>
#include <fstream>

using namespace OpenEMBL::Phoenix;

class PARSER : public FileParserHost
{
public:
    PARSER(char const * theFilename)
        : FileParserHost( theFilename )
		, _outfile(NULL)
		, _inCDS(false)
    {
    }

    ~PARSER() throw()
    {
		closeOutfile();
    }

    METHOD onBeginEntry(long theLineNumber)
    {
        FileParserHost::onBeginEntry(theLineNumber);
        _accession.clear();
		_inCDS = false;
    }

    METHOD onEndEntry()
    {
        FileParserHost::onEndEntry();
        _accession.clear();
		_inCDS = false;
    }

    METHOD onID(long theLineNumber, const IDLine & theField)
    {
		_accession = theField.EntryName;

		std::string fileName( _accession + ".CDS" );

		openOutfile( fileName.c_str() );
    }

    void handleError(
        long          theLineNumber,
        int           theErrorCode,
        char const*   theDescription,
        char const*   theMessage)
    {
        if (NULL == theMessage)
            fprintf(
                stderr,
                "%s(%ld)[%s]: error P%05d: %s.\n",
                getSourceDisplayName(),
                theLineNumber,
                _accession.c_str(),
                theErrorCode,
                theDescription);
        else
            fprintf(
                stderr,
                "%s(%ld)[%s]: error P%05d: %s: %s.\n",
                getSourceDisplayName(),
                theLineNumber,
                _accession.c_str(),
                theErrorCode,
                theDescription,
                theMessage);
    }

    void handleWarning(
        long            theLineNumber,
        int             theWarningNumber,
        int             theWarningLevel,
        char const*    theDescription,
        char const*    theMessage)
    {
        if (NULL == theMessage)
            fprintf(
                stderr,
                "%s(%ld)[%s]: warning W%05d: %s.\n",
                getSourceDisplayName(),
                theLineNumber,
                _accession.c_str(),
                theWarningNumber,
                theDescription);
        else
            fprintf(
                stderr,
                "%s(%ld)[%s]: warning W%05d: %s: %s.\n",
                getSourceDisplayName(),
                theLineNumber,
                _accession.c_str(),
                theWarningNumber,
                theDescription,
                theMessage);
    }

	METHOD onBeginFeatureKey(long theLineNumber, FeatureKey const & theField)
	{
		_inCDS = (theField.Key == "CDS");
	}

	METHOD onEndFeatureKey()
	{
		if (_inCDS) 
			saveCDS();

		_qualifiers.clear();
		_inCDS = false;
	}

	METHOD onFeatureQualifier(long theLineNumber, FeatureQualifier const  & theQualifier)
	{
		_qualifiers.push_back( theQualifier );
	}

private:
	void openOutfile(const char* fileName)
	{
		closeOutfile();
		this->_outfile = new std::ofstream( fileName );
	}

	void closeOutfile()
	{
		if (NULL != _outfile)
		{
			_outfile->flush();
			delete _outfile;
			_outfile = NULL;
		}
	}

	void saveCDS()
	{
		writeFastaHeader();
		writeSequence();
	}

	void dequoteQualifierValue( StringList & value )
	{
		if ( value.size() == 0 )
			return;

		const char QUOTE = '\"';

		const size_t firstIndex  = 0;
		const size_t lastIndex   = value.size() - 1;

		String & firstLine = value[ firstIndex ];
		String & lastLine  = value[ lastIndex ]; 

		size_t firstQuoteIndex = 0;
		size_t lastQuoteIndex = lastLine.length() - 1;

		bool firstLineQuoted = !firstLine.empty() && ( firstLine[ firstQuoteIndex ] == QUOTE );
		bool lastLineQuoted = !lastLine.empty() && ( lastLine[ lastQuoteIndex ] == QUOTE );

		if ( firstLineQuoted && lastLineQuoted )
		{
			lastLine.erase( lastQuoteIndex, 1 );
			firstLine.erase( firstQuoteIndex, 1 );
		}
	}

	void getQualifierValues( String const & qualifierName, std::vector<StringList> & values )
	{
		for (size_t i = 0; i < _qualifiers.size(); ++i)
		{
			if ( 0 == strCaseCmp( _qualifiers[i].Name.c_str(), qualifierName.c_str() ) )
			{
				StringList value = _qualifiers[i].Value;
				dequoteQualifierValue( value );
				values.push_back(value );
			}
		}
	}

	void getUniprotIdList( std::vector<StringList> const & dbXrefs, StringList  & ids )
	{
		static const char* header = "UniProtKB/Swiss-Prot:";
		static const size_t headerLength = std::strlen(header);

		for (size_t i = 0; i < dbXrefs.size(); i++)
		{
			std::string dbx = dbXrefs[i][0];
			if ( 0 == (strNCaseCmp( header, dbx.c_str(), headerLength ) ) )
				ids.push_back( dbx.substr( headerLength ) );
		}
	}

	std::string getUniprotId()
	{
		std::string result;

		std::vector<StringList> dbXRefs;
		getQualifierValues( "db_xref", dbXRefs );
		
		if (dbXRefs.size() > 0)
		{
			StringList uniProtIds;
			getUniprotIdList( dbXRefs, uniProtIds );

			if (uniProtIds.size() > 0)
				result = uniProtIds[0];
		}

		return result;
	}

	std::string getProteinId()
	{
		std::string result;

		std::vector<StringList> ids;
		getQualifierValues( "protein_id", ids );

		if (ids.size() > 0)
			result = ids[0][0];

		return result;
	}

	void writeFastaHeader()
	{
		std::string proteinId = getProteinId();
		std::string uniprotId = getUniprotId();

		if ( _outfile )
		{
			*_outfile << ">" << proteinId << "|" << uniprotId << "|\n";
		}
	}

	void writeSequence()
	{
		std::vector<StringList> translations;
		getQualifierValues( "translation", translations );

		if ( ( translations.size() > 0 ) && _outfile )
		{
			StringList const & translation = translations[0];

			for (size_t i = 0; i < translation.size(); i++)
			{
				*_outfile << translation[i] << "\n";			
			}
		}
	}

private:
    std::string     _accession;
	std::ofstream*	_outfile;
	bool			_inCDS;

	std::vector<FeatureQualifier> _qualifiers;
};

int main(int argc, char* argv[])
{
    using namespace OpenEMBL;

    if (argc < 2)
        return 1;

    PARSER theParser(argv[1]);
    theParser.parse();
    return 0;
}


