/**++
 *   
 *   LICENSE
 *   -------
 *   
 *   Copyright (c) 2004 Renato Mancuso
 *   All rights reserved.
 *   
 *   Redistribution and use in source and binary forms, with or without modification, are 
 *   permitted provided that the following conditions are met:
 *   
 *   - Redistributions of source code must retain the above copyright notice, this list 
 *     of conditions and the following disclaimer.
 *   
 *   - Redistributions in binary form must reproduce the above copyright notice, this list
 *     of conditions and the following disclaimer in the documentation and/or other materials 
 *     provided with the distribution.
 *   
 *   - Neither the name of Renato Mancuso nor the names of its contributors may be used to 
 *     endorse or promote products derived from this software without specific prior written 
 *     permission.
 *   
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS 
 *   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
 *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
 *   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 *   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 *   IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
 *   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *   
--**/


#include "ACLineParser.hpp"
#include "ParserErrors.hpp"
#include "ParserWarnings.hpp"
#include "StringUtils.hpp"
#include "ListReader.hpp"
#include "Regex.hpp"

#include <algorithm>
#include <sstream>
#include <iomanip>
#include <cstdlib>
#include <cassert>

namespace OpenEMBL
{
namespace Phoenix
{

    ACLineParser::ACLineParser(
        IParserCtx*             pParserCtx,
        IItemHandler<ACLine>*   pItemHandler)
        : ItemParserImpl<ACLine>(pParserCtx, pItemHandler)
    {
    }

    METHODIMP ACLineParser::parse()
    {
        long   theLineNumber = currentLineNumber();
        LPCSTR theLine       = currentLine();

        if (NULL == theLine)
        {
            logError(theLineNumber,
                     ERROR_EOF_FOUND,
                     NULL);
            return;
        }

        if (!isACLine(theLine))
        {
            logError(theLineNumber,
                     ERROR_INVALID_AC_LINE,
                     theLine);
            nextLine();
            return;
        }

        StringList theLines;
        extractLines(isACLine, theLines);
        processLines(theLines, theLineNumber);
    }

    bool ACLineParser::isACLine(const char* theLine)
    {
        assert(NULL != theLine);
        return 0 == strncmp(theLine, "AC   ", 5);
    }

    String ACLineParser::getLineContent(const String & theLine)
    {
        assert(theLine.length() >= 5);
        return trimRight(theLine.substr(5));
    }

    void ACLineParser::processLines(StringList & theLines, long theLineNumber)
    {
        std::transform(
            theLines.begin(),
            theLines.end(),
            theLines.begin(),
            getLineContent);

        //
        // extract list of accession numbers
        //
        StringList  theAccessionNumbers;
        ListReader  theListBuilder(this, ';', ';');

        theListBuilder.readList(theLineNumber, theLines, theAccessionNumbers);

        //
        // expand acc# whose format is AB001-AB999
        //
        ACLine      theField;

        expandAccessionList(theLineNumber, theAccessionNumbers, theField.AccessionNumberList);

        notifyParsed(theLineNumber, theField);
    }


    void ACLineParser::expandAccessionList(long theLineNumber, StringList const & input, StringList & dest)
    {
        //
        // regex is: ([A-Z][A-Za-z0=9]+)([0-9]+)-\1([0-9]+)
        //  
        //   groups:
        //      1) common prefix (ex: AB0)
        //      2) lower number (ex: 01)
        //      3) upper number (ex: 99)
        //
        static Regex re( "([A-Z][A-Za-z0=9]+)([0-9]+)-\\1([0-9]+)" );
        
        for (size_t i = 0; i < input.size(); ++i )
        {
            String accessionNumber = input[ i ];

            Match m;

            if ( regexSearch( accessionNumber, m, re ) )
            {
                if (!expandAccessionNumber( m.str(1), m.str(2), m.str(3), dest ))
                {
                    logWarning(
                        theLineNumber,
                        WARNING_UNRECOGNIZED_ACCESSION_NUMBER_FORMAT,
                        accessionNumber.c_str() );

                    dest.push_back( accessionNumber );
                }
            }
            else
            {
                dest.push_back( accessionNumber );
            }
        }
    }


    bool ACLineParser::expandAccessionNumber( 
        std::string const & prefix, 
        std::string const & lowerBound,
        std::string const & upperBound,
        StringList & dest 
     )
    {
        const size_t width = lowerBound.length();

        int lower = atoi( lowerBound.c_str() );
        int upper = atoi( upperBound.c_str() );

        if (lower < upper )
        {
            for ( int i = lower; i <= upper; ++i )
            {
                std::ostringstream oss;

                oss << prefix;
                oss << std::setfill( '0' ) << std::setw( std::streamsize(width) ) << i;

                dest.push_back( oss.str() );
            }

            return true;
        }
        else
        {
            return false;
        }
    }


}
}
