/**++
 *   
 *   LICENSE
 *   -------
 *   
 *   Copyright (c) 2004 Renato Mancuso
 *   All rights reserved.
 *   
 *   Redistribution and use in source and binary forms, with or without modification, are 
 *   permitted provided that the following conditions are met:
 *   
 *   - Redistributions of source code must retain the above copyright notice, this list 
 *     of conditions and the following disclaimer.
 *   
 *   - Redistributions in binary form must reproduce the above copyright notice, this list
 *     of conditions and the following disclaimer in the documentation and/or other materials 
 *     provided with the distribution.
 *   
 *   - Neither the name of Renato Mancuso nor the names of its contributors may be used to 
 *     endorse or promote products derived from this software without specific prior written 
 *     permission.
 *   
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS 
 *   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
 *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
 *   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 *   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 *   IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
 *   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *   
--**/


#include "JournalArticleInfoParser.hpp"
#include "ParserErrors.hpp"
#include "ParserWarnings.hpp"
#include "StringUtils.hpp"
#include "RLLineCodes.hpp"
#include "Regex.hpp"

#include <algorithm>
#include <sstream>
#include <cstdlib>
#include <cstring>
#include <cassert>

namespace OpenEMBL
{
namespace Phoenix
{

    JournalArticleInfoParser::JournalArticleInfoParser(
        IParserCtx*                         pParserCtx,
        IItemHandler<JournalArticleInfo>*   pItemHandler)
        : ItemParserImpl<JournalArticleInfo>(pParserCtx, pItemHandler)
    {
    }

    METHODIMP JournalArticleInfoParser::parse()
    {
        long   theLineNumber = currentLineNumber();
        LPCSTR theLine       = currentLine();

        if (NULL == theLine)
        {
            logError(theLineNumber,
                     ERROR_EOF_FOUND,
                     NULL);
            return;
        }

        if (0 != strncmp(theLine, RL_JOURNAL, strlen(RL_JOURNAL)))
        {
            logError(theLineNumber,
                     ERROR_INVALID_RL_LINE,
                     theLine);
            nextLine();
            return;
        }

        StringList theLines;
        extractLines(isRLLine, theLines);
        processLines(theLines, theLineNumber);
    }

    bool JournalArticleInfoParser::isRLLine(const char* theLine)
    {
        assert(NULL != theLine);
        return 0 == strncmp(theLine, "RL   ", 5);
    }

    String JournalArticleInfoParser::getLineContent(const String & theLine)
    {
        assert(theLine.length() >= 5);
        return trim(theLine.substr(5));
    }

    String JournalArticleInfoParser::makeString(StringList const & theLines)
    {
        std::ostringstream oss;

        if (theLines.size() > 0)
        {
            oss << getLineContent(theLines[0]);

            for (size_t i = 1; i < theLines.size(); ++i)
                oss << ' ' << getLineContent(theLines[i]);
        }

        return oss.str();
    }

    void JournalArticleInfoParser::processLines(const StringList & theLines, long theLineNumber)
    {
        JournalArticleInfo theField;

        String theLine = makeString(theLines);

        if (!getPublicationDate(theLine, theLineNumber, theField))
            return;

        if (!getPageInfo(theLine, theLineNumber, theField))
            return;

        if (!getIssue(theLine, theLineNumber, theField))
            return;

        if (!getVolume(theLine, theLineNumber, theField))
            return;

        if (!getJournalName(theLine, theLineNumber, theField))
            return;

        notifyParsed(theLineNumber, theField);
    }

    bool JournalArticleInfoParser::getPublicationDate(
                String &                theLine, 
                long                    theLineNumber, 
                JournalArticleInfo &    theField)
    {
        //
        // Regex is: \(([^)]+)\)\s*(\.){0,1}$
        //
        // groups
        //      1) date
        //      2) final dot 
        //
        static Regex theRegex("\\(([^)]+)\\)\\s*(\\.){0,1}$");

        Match theMatches;

        if (!regexSearch(theLine, theMatches, theRegex))
        {
            logError(theLineNumber,
                     ERROR_MISSING_PUBLICATION_DATE,
                     theLine.c_str());
            return false;
        }

        if (!theMatches[2].matched)
        {
            logWarning(theLineNumber,
                       WARNING_MISSING_FINAL_DOT,
                       theLine.c_str());
        }

        theField.PublicationDate = trim(theMatches.str(1));

        theLine = regexMerge(theLine, theRegex, "");

        return true;
    }

    bool JournalArticleInfoParser::getPageInfo(
                String &                theLine, 
                long                    theLineNumber, 
                JournalArticleInfo &    theField)
    {
        //
        // Regex is: :\s*([^-\s]+)(?:\s*-\s*([^\s]+)){0,1}\s*$
        //
        static Regex theRegex(":\\s*([^-\\s]+)(?:\\s*-\\s*([^\\s]+)){0,1}\\s*$");

        Match theMatches;

        if (!regexSearch(theLine, theMatches, theRegex))
        {
            logError(theLineNumber,
                     ERROR_RL_JOURNAL_MISSING_PAGE_INFO,
                     theLine.c_str());
            return false;
        }

        if (!theMatches[2].matched)
        {
            logWarning(theLineNumber,
                       WARNING_RL_JOURNAL_MISSING_LAST_PAGE,
                       theLine.c_str());
        }

        theField.FirstPage = trim(theMatches.str(1));

        if (theMatches[2].matched)
        {
            theField.LastPage = trim(theMatches.str(2));
        }
        else
        {
            theField.LastPage = theField.FirstPage;
        }

        theLine = regexMerge(theLine, theRegex, "");

        return true;
    }

    bool JournalArticleInfoParser::getIssue(
                String &                theLine, 
                long                    theLineNumber, 
                JournalArticleInfo &    theField)
    {
        //
        // Regex is: \(([^(]*)\)\s*$
        //
        static Regex theRegex("\\(([^(]*)\\)\\s*$");

        Match theMatches;

        if (regexSearch(theLine, theMatches, theRegex))
        {
            theField.Issue = trim(theMatches.str(1));

            if (theField.Issue.empty())
            {
                logWarning(theLineNumber,
                           WARNING_JOURNAL_EMPTY_ISSUE,
                           theLine.c_str());
            }

            theLine = regexMerge(theLine, theRegex, "");
        }

        return true;
    }

    bool JournalArticleInfoParser::getVolume(
                String &                theLine, 
                long                    theLineNumber, 
                JournalArticleInfo &    theField)
    {
        theLine = trimRight(theLine);
        std::string::size_type theSplitPosition = theLine.rfind(' ');

        if (theSplitPosition == std::string::npos)
        {
            logError(theLineNumber,
                     ERROR_RL_JOURNAL_MISSING_VOLUME_INFO,
                     theLine.c_str());
            return false;
        }

        theField.Volume = trim(theLine.substr(theSplitPosition));
        theLine.erase(theSplitPosition);

        return true;
    }

    bool JournalArticleInfoParser::getJournalName(
                String &                theLine, 
                long                    theLineNumber, 
                JournalArticleInfo &    theField)
    {
        theField.JournalName = trim(theLine);

        if (theField.JournalName.empty())
        {
            logError(theLineNumber,
                     ERROR_RL_JOURNAL_MISSING_JOURNAL_NAME,
                     theLine.c_str());
            return false;
        }

        return true;
    }


}
}
