/**++
 *   
 *   LICENSE
 *   -------
 *   
 *   Copyright (c) 2004 Renato Mancuso
 *   All rights reserved.
 *   
 *   Redistribution and use in source and binary forms, with or without modification, are 
 *   permitted provided that the following conditions are met:
 *   
 *   - Redistributions of source code must retain the above copyright notice, this list 
 *     of conditions and the following disclaimer.
 *   
 *   - Redistributions in binary form must reproduce the above copyright notice, this list
 *     of conditions and the following disclaimer in the documentation and/or other materials 
 *     provided with the distribution.
 *   
 *   - Neither the name of Renato Mancuso nor the names of its contributors may be used to 
 *     endorse or promote products derived from this software without specific prior written 
 *     permission.
 *   
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS 
 *   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
 *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
 *   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 *   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 *   IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
 *   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *   
--**/


#include "BookInfoParser.hpp"
#include "ParserErrors.hpp"
#include "ParserWarnings.hpp"
#include "StringUtils.hpp"
#include "ListReader.hpp"
#include "RLLineCodes.hpp"
#include "Regex.hpp"

#include <algorithm>
#include <sstream>
#include <cstdlib>
#include <cstring>
#include <cassert>

namespace OpenEMBL
{
namespace Phoenix
{

    BookInfoParser::BookInfoParser(
        IParserCtx*               pParserCtx,
        IItemHandler<BookInfo>*   pItemHandler)
        : ItemParserImpl<BookInfo>(pParserCtx, pItemHandler)
    {
    }

    METHODIMP BookInfoParser::parse()
    {
        long   theLineNumber = currentLineNumber();
        LPCSTR theLine       = currentLine();

        if (NULL == theLine)
        {
            logError(theLineNumber,
                     ERROR_EOF_FOUND,
                     NULL);
            return;
        }

        if (0 != strNCaseCmp(theLine, RL_BOOK, strlen(RL_BOOK)))
        {
            logError(theLineNumber,
                     ERROR_INVALID_RL_BOOK_LINE,
                     theLine);
            nextLine();
            return;
        }

        processLines();
    }

    void BookInfoParser::processLines()
    {
        long theLineNumber = currentLineNumber();

        StringList theEditorInfo;
        StringList theTitleInfo;
        StringList thePublisherInfo;

        getEditorInfoLines(theEditorInfo);
        getTitleInfoLines(theTitleInfo);
        getPublisherInfoLines(thePublisherInfo);

        BookInfo theBookInfo;

        if (!processEditorInfo(
                        theEditorInfo, 
                        theLineNumber, 
                        theBookInfo))
            return;

        if (!processTitleInfo(
                        theTitleInfo, 
                        theLineNumber + long(theEditorInfo.size()), 
                        theBookInfo))
            return;

        if (!processPublisherInfo(
                        thePublisherInfo, 
                        theLineNumber + long(theEditorInfo.size() + theTitleInfo.size()), 
                        theBookInfo))
            return;

        notifyParsed(theLineNumber, theBookInfo);
    }

    bool BookInfoParser::isRLLine(const char* theLine)
    {
        assert(NULL != theLine);
        return 0 == strncmp(theLine, "RL   ", 5);
    }

    String BookInfoParser::getLineContent(const String & theLine)
    {
        assert(theLine.length() >= 5);
        return trimRight(theLine.substr(5));
    }

    String BookInfoParser::makeString(StringList const & theLines)
    {
        std::ostringstream oss;

        if (theLines.size() > 0)
        {
            oss << trimLeft(theLines[0]);

            for (size_t i = 1; i < theLines.size(); ++i)
                oss << ' ' << trimLeft(theLines[i]);
        }
        
        return oss.str();
    }

    void BookInfoParser::getEditorInfoLines(StringList & theEditorInfo)
    {
        while (1)
        {
            if (NULL == currentLine())
                break;

            if (!isRLLine(currentLine()))
                break;

            std::string theLine = getLineContent(currentLine());

            theEditorInfo.push_back(theLine);

            nextLine();

            if (theLine.empty() == false && theLine[theLine.length() - 1] == ';')
                break;
        }
    }

    void BookInfoParser::getTitleInfoLines(StringList & theTitleInfo)
    {
        while (1)
        {
            if (NULL == currentLine())
                break;

            if (!isRLLine(currentLine()))
                break;

            std::string theLine = getLineContent(currentLine());

            theTitleInfo.push_back(theLine);

            nextLine();

            if (theLine.empty() == false && theLine[theLine.length() - 1] == ';')
                break;
        }
    }

    void BookInfoParser::getPublisherInfoLines(StringList & thePublisherInfo)
    {
        while (1)
        {
            if (NULL == currentLine())
                break;

            if (!isRLLine(currentLine()))
                break;

            std::string theLine = getLineContent(currentLine());

            thePublisherInfo.push_back(theLine);

            nextLine();
        }
    }

    bool BookInfoParser::processEditorInfo(
                StringList &        theEditorInfo, 
                long                theLineNumber, 
                BookInfo &          theBookInfo)
    {
        if (theEditorInfo.size() == 0)
        {
            logError(theLineNumber,
                     ERROR_MISSING_BOOK_EDITOR_INFO,
                     NULL);
            return false;
        }

        if (!stripEditorListPrefix(theEditorInfo[0], theLineNumber))
            return false;

        if (!stripEditorListSuffix(theEditorInfo[theEditorInfo.size() - 1], 
                                   theLineNumber + (long) (theEditorInfo.size() - 1)))
            return false;
        
        if (!makeEditorList(theEditorInfo, theLineNumber, theBookInfo.Editors))
            return false;        

        return true;
    }

    bool BookInfoParser::stripEditorListPrefix(String & theLine, long theLineNumber)
    {
        assert(0 == strNCaseCmp(theLine.c_str(), "(in)", 4));

        theLine = trimLeft(theLine.substr(4));
        
        return true;
    }

    bool BookInfoParser::stripEditorListSuffix(String & theLine, long theLineNumber)
    {
        if (theLine.empty())
        {
            logError(theLineNumber,
                     ERROR_RL_LINE_EMPTY_EDITOR_LIST,
                     NULL);
            return false;
        }

        theLine = trimRight(theLine);

        if (theLine[theLine.length() - 1] != ';')
        {
            logError(theLineNumber,
                     ERROR_RL_LINE_EDITOR_LIST_MISSING_TERMINATOR,
                     theLine.c_str());
            return false;
        }

        theLine.erase(theLine.length() - 1);

        //
        // Regex is: \(\s*eds\s*\.\s*\)\s*$
        //
        static Regex theRegex("\\(\\s*eds\\s*\\.\\s*\\)\\s*$",
                                     PCRE_CASELESS);

        Match theMatches;

        if (!regexSearch(theLine, theMatches, theRegex))
        {
            logWarning(theLineNumber,
                       WARNING_MISSING_EDITOR_LIST_TERMINATOR,
                       theLine.c_str());
        }
        else
        {
            // cut it out
            theLine = regexMerge(theLine, theRegex, "");
        }

        return true;
    }

    bool BookInfoParser::makeEditorList(
                StringList const & theEditorInfo, 
                long               theLineNumber, 
                StringList &       theEditors)
    {
        ListReader theListBuilder(this, ',', 0, ListReader::LACK_TERMINATOR);

        theListBuilder.readList(theLineNumber, theEditorInfo, theEditors);
        return true;
    }

    bool BookInfoParser::processTitleInfo(
                StringList &  theTitleInfo, 
                long          theLineNumber, 
                BookInfo &    theBookInfo)
    {
        if (theTitleInfo.size() == 0)
        {
            logError(theLineNumber,
                     ERROR_MISSING_BOOK_TITLE_INFO,
                     NULL);
            return false;
        }        

        std::string theField = makeString(theTitleInfo);

        //
        // Regex is: ^(.+):\s*([^-\s]+)\s*-\s*([^\s;]+)\s*;$
        //
        // capture groups:
        //      1) title
        //      2) firstpage
        //      3) lastpage
        //   
        static Regex theRegex("^(.+):\\s*([^-\\s]+)\\s*-\\s*([^\\s;]+)\\s*;$",
                                     PCRE_CASELESS);

        Match theMatches;
        
        if (!regexSearch(theField, theMatches, theRegex))        
        {
            logError(theLineNumber,
                     ERROR_INVALID_BOOK_TITLE_FORMAT,
                     theField.c_str());
            return false;
        }

        theBookInfo.Title       = trim(theMatches.str(1));
        theBookInfo.FirstPage   = trim(theMatches.str(2));
        theBookInfo.LastPage    = trim(theMatches.str(3));

        return true;
    }

    bool BookInfoParser::processPublisherInfo(
                StringList &  thePublisherInfo, 
                long          theLineNumber, 
                BookInfo &    theBookInfo)
    {
        if (thePublisherInfo.size() == 0)
        {
            logError(theLineNumber,
                     ERROR_MISSING_BOOK_PUBLISHER_INFO,
                     NULL);
            return false;
        }

        std::string theField = makeString(thePublisherInfo);

        //
        // Regex is: \(([^)]+)\)\s*([\.;]){0,1}\s*$
        //
        static Regex theRegex("\\(([^)]+)\\)\\s*([\\.;]){0,1}\\s*$");

        Match theMatches;

        if (!regexSearch(theField, theMatches, theRegex))
        {
            logWarning(theLineNumber + (long)(thePublisherInfo.size() - 1),
                       WARNING_MISSING_PUBLICATION_DATE,
                       theField.c_str());
        }
        else
        {
            theBookInfo.PublicationDate = trim(theMatches.str(1));
        
            if (theMatches[2].matched)
            {
                logWarning(theLineNumber + long(thePublisherInfo.size() - 1),
                           WARNING_INVALID_TERMINATOR_IN_PUBLISHER_INFO,
                           theField.c_str());
            }

            theField = regexMerge(theField, theRegex, "");
        }

        theBookInfo.Publisher = trim(theField);

        return true;
    }

}
}
