/**++
 *   
 *   LICENSE
 *   -------
 *   
 *   Copyright (c) 2004 Renato Mancuso
 *   All rights reserved.
 *   
 *   Redistribution and use in source and binary forms, with or without modification, are 
 *   permitted provided that the following conditions are met:
 *   
 *   - Redistributions of source code must retain the above copyright notice, this list 
 *     of conditions and the following disclaimer.
 *   
 *   - Redistributions in binary form must reproduce the above copyright notice, this list
 *     of conditions and the following disclaimer in the documentation and/or other materials 
 *     provided with the distribution.
 *   
 *   - Neither the name of Renato Mancuso nor the names of its contributors may be used to 
 *     endorse or promote products derived from this software without specific prior written 
 *     permission.
 *   
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS 
 *   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
 *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
 *   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 *   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 *   IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
 *   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *   
--**/


#include "SequenceLineParser.hpp"
#include "ParserErrors.hpp"
#include "ParserWarnings.hpp"
#include "StringUtils.hpp"

#include <vector>
#include <sstream>
#include <cstdlib>
#include <cstring>
#include <cctype>
#include <cassert>

namespace OpenEMBL
{
namespace Phoenix
{

    SequenceLineParser::SequenceLineParser(
        IParserCtx*                     pParserCtx,
        IItemHandler<SequenceLine>*     pItemHandler)
        : ItemParserImpl<SequenceLine>(pParserCtx, pItemHandler)        
    {
    }

    METHODIMP SequenceLineParser::parse()
    {
        long   theLineNumber = currentLineNumber();
        LPCSTR theLine       = currentLine();

        if (NULL == theLine)
        {
            logError(theLineNumber,
                ERROR_EOF_FOUND,
                NULL );
            return;
        }

        processLine(theLine, theLineNumber);
        nextLine();

        //
        // continue processing lines until we reach the end of the record
        //
        while (1)
        {
            if (NULL == currentLine())
                break;

            if (0 == strncmp(currentLine(), "//", 2))
                break;

            if (0 != strncmp(currentLine(), "  ", 2))
            {
                logWarning(
                    currentLineNumber(),
                    WARNING_BAD_FIELD_CODE_IN_SEQUENCE_LINE,
                    currentLine());
            }

            processLine(currentLine(), currentLineNumber());

            nextLine();
        }
    }

    void SequenceLineParser::processLine(const std::string & theLine, long theLineNumber)
    {
        std::vector<char> theBuffer;
        theBuffer.reserve(theLine.length() + 1);

        std::string::const_iterator first = theLine.begin();
        std::string::const_iterator last  = theLine.end();

        for (; first < last; ++first)
        {
            char c = *first;

            if (isNucleotideCode(c))
            {
                theBuffer.push_back(c);
                continue;
            }
            else if (isspace(c) || isdigit(c))
            {
                continue;
            }
            else
            {
                std::ostringstream oss;

                oss << "character "
                    << makePrintable(c)
                    << " found at column " 
                    << long((first - theLine.begin()) + 1);

                logError(theLineNumber,
                         ERROR_INVALID_NUCLEOTIDE_CODE,
                         oss.str().c_str());
                return;
            }
        }

        theBuffer.push_back(char(0));

        SequenceLine theField;

        theField.Sequence = &theBuffer[0];

        notifyParsed(theLineNumber, theField);
    }

    bool SequenceLineParser::isNucleotideCode(char c)
    {
        switch (c)
        {
        case 'a': case 'b': case 'c': case 'd':
        case 'g': case 'h': case 'k': case 'm':
        case 'n': case 'r': case 's': case 't':
        case 'v': case 'w': case 'y': case 'u':

        case 'A': case 'B': case 'C': case 'D':
        case 'G': case 'H': case 'K': case 'M':
        case 'N': case 'R': case 'S': case 'T':
        case 'V': case 'W': case 'Y': case 'U':
            return true;                
        }

        return false;
    }

}
}
