/**++
 *   
 * 2006-09-15 Christian Quast (CQ) - adaptation to the new EMBL (r87+) ID line format
 *
 *
 *   LICENSE
 *   -------
 *   
 *   Copyright (c) 2004 Renato Mancuso
 *   All rights reserved.
 *   
 *   Redistribution and use in source and binary forms, with or without modification, are 
 *   permitted provided that the following conditions are met:
 *   
 *   - Redistributions of source code must retain the above copyright notice, this list 
 *     of conditions and the following disclaimer.
 *   
 *   - Redistributions in binary form must reproduce the above copyright notice, this list
 *     of conditions and the following disclaimer in the documentation and/or other materials 
 *     provided with the distribution.
 *   
 *   - Neither the name of Renato Mancuso nor the names of its contributors may be used to 
 *     endorse or promote products derived from this software without specific prior written 
 *     permission.
 *   
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS 
 *   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
 *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 
 *   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 *   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 
 *   IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
 *   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *   
--**/


#ifndef OPEN_EMBL_FIELD_DEFS_INCLUDED
#define OPEN_EMBL_FIELD_DEFS_INCLUDED

#if !defined(OPEN_EMBL_NO_PRAGMA_ONCE)
#pragma once
#endif

#include <string>
#include <vector>


namespace OpenEMBL
{
namespace Phoenix
{

//
//  Common type definitions
//
typedef std::string            String;
typedef std::vector<String>    StringList;

struct Range
{
    long  First;
    long  Last;

    Range()
            : First(0)
            , Last(0)
    {}
}
;

//
// ID line
//
struct IDLine
{

    String      EntryName;
    String      DataClass;
    bool        Circular;
    String      MolType;
    String      Division;
    long        SequenceLength;
    // added by CQ -- adaptation to the new EMBL ID line format
    int         SequenceVersion;


    IDLine()
            : Circular(false)
            , SequenceLength(0)
            , SequenceVersion(0)
    {}
}
;

//
// XX line
//
struct XXLine
    {}
;

//
// AC lines
//
struct ACLine
{
    StringList  AccessionNumberList;
};

//
// SV line
//
struct SVLine
{
    String      AccessionNumber;
    int         Version;

    SVLine()
            : Version(0)
    {}
}
;

//
// DT lines
//
struct DTLine
{
    String      FirstCreated;
    int         ReleaseCreated;
    String      LastUpdated;
    int         ReleaseLastUpdated;
    int         ExternalVersion;

    DTLine()
            : ReleaseCreated(0)
            , ReleaseLastUpdated(0)
            , ExternalVersion(0)
    {}
}
;

//
// DE lines
//
struct DELine
{
    StringList    Text;
};

//
// KW lines
//
struct KWLine
{
    StringList  Keywords;
};

//
// Taxonomy lines (OS, OC, OG)
//
struct OSLine
{
    StringList  Organism;
};

struct OCLine
{
    StringList  Lineage;
};

struct OGLine
{
    StringList   Organelles;
};

//
// Reference lines (publications)
//
struct RNLine
{
    int ReferenceNumber;

    RNLine()
            : ReferenceNumber(0)
    {}
}
;

struct RCLine
{
    StringList  Comment;
};

struct RPLine
{
    std::vector<Range>  Positions;
};

struct RXLine
{
    String      DatabaseIdentifier;
    String      Identifier;
};

struct RGLine
{
    StringList  Consortium;
};

struct RALine
{
    StringList  Authors;
};

struct RTLine
{
    StringList  Text;
};

//
//  RL line(s)
//

// 1) Submission:
//
//      ID = 'Submitted ('
//
//      RL   Submitted (19-NOV-1990) to the EMBL/GenBank/DDBJ databases.
//      RL   M.A. Hughes, UNIVERSITY OF NEWCASTLE UPON TYNE, MEDICAL SCHOOL, NEW
//      RL   CASTLE UPON TYNE, NE2  4HH, UK
//
struct SubmissionInfo
{
    String          SubmissionDate;     // (ex: '19-NOV-1990')
    StringList      Info;               // ('M.A. Hughes, UNIVERSITY OF...' etc;
};

// 2) Unpublished:
//
//      ID = 'Unpublished.'
//
//      RL   Unpublished.
//
struct UnpublishedInfo
    {}
;

// 3) Book
//
//      ID = '(in)'
//
//      3 lists separated by ';'
//          a) editors
//          b) title:firstpage-lastpage;
//          c) publisher (pubdate)
//
//      editor list actually terminated by '(eds.);'
//
//      The format of the book line is assumed to be as follows:
//
//      (in) surname firstname, surname firstname,
//      surname firstname, ... (eds.);
//      booktitle: firstpage-lastpage;
//      publisher (pubdate)
//
//      For example:
//
//              (in) Cummings D.J., Borst P., Dawid I.B.,
//              Weissman and Fox C.F. (eds.);
//              EXTRACHROMOSOMAL DNA: 339-355;
//              New York: Academic Press (1979)
//
struct BookInfo
{
    StringList  Editors;
    String      Title;
    String      FirstPage;
    String      LastPage;
    String      Publisher;
    String      PublicationDate;
};

// 4) Thesis
//
//      ID = 'Thesis '
//
//      The format of the thesis line is assumed to be as follows:
//
//      Thesis (pubdate), institute_name
//
//      For example:
//
//              Thesis (1993), The University of Tokyo
//              Thesis (1984), Universitaet Muenchen
//
struct ThesisInfo
{
    String          Year;
    StringList      Institute;
};

// 5) Patent
//
//      ID = 'Patent number '
//
//      The format of the patent line is assumed to be as follows:
//
//      Patent number docofficedocnum-doctype/orderin, pubdate
//      appname; appname; appname.
//
//      For example:
//
//      Patent number EP0238993-A/3, 30-SEP-1987.
//      BAYER AG.
//
struct PatentInfo
{
    String          PatentNumber;  // 'EP0238993-A/3'
    String          Date;
    StringList      Applicants;
};

// 6) online Journal
//
//      ID = 'Online Publication.'
//
//      RL   Online Publication.
//
struct OnlineJournalInfo
    {}
;

// 7) Electronic Resource
//
//      ID = '(er)'
//
//      RL   (er) free text
//
struct ElectronicResourceInfo
{
    StringList  FreeText;
};

// 8) Misc Resource
//
//      ID = '(misc)'
//
//      RL   (misc) Proc. Vth Int. Symp. Biol. Terr. Isopods 2:365-380(2003).
//
struct MiscResourceInfo
{
    StringList  FreeText;
};

// 9) JournalArticle
//
//    ID = if all the previous ones failed then it must be a Journal Article...
//
//    FORMAT   : journaltitle volume(issue):firstpage-lastpage(pubdate).
//
//    EXAMPLE  : Nucleic Acids Res. 10:20-25(1988).
//               Scientific American 5(4):14-18(1978).
//               Scientific American 5(4-5):14-18(1978).
//               Scientific American 5:0-0(0).
//               Scientific American 0:0-0(0).
//               Nucleic Acids Res. A10:G20-G25(1988).
//
//    NOTE:
//      letters are allowed in volume, firstpage and lastpage
//
//      the journalname can contain blanks and numbers (numbers also at the
//      end, must not be confused with the volume!)
//
//      Volume can contain alphanum. chars but no blanks
//
//      Issue is not mandatory, can contain blanks and alphanum chars and is in
//      () (no brackets allowed without an issue)
//
//      pages can contain alphanum chars but no blanks
//
//      year must be in format YYYY or 0
//      year is mandatory for journalarticles.
//      for accepted, pages, year and volume can be '0' but cannot be left out.
//
//      ':', '-', '.' at the end and '()' around year are mandatory and are used
//      at the moment to validate the correct format and also to decide, what
//      belongs to the journal title and what is already the volume.
//
//      (multiple) blanks are allowed between the items
//
struct JournalArticleInfo
{
    String      JournalName;
    String      Volume;
    String      Issue;
    String      FirstPage;
    String      LastPage;
    String      PublicationDate;
};

//
// DR line
//
struct DRLine
{
    String      DatabaseIdentifier;
    String      PrimaryIdentifier;
    String      SecondaryIndentifier;
};

//
// AS line
//
struct ASLine
{
    Range       TPASpan;
    String      PrimaryIdentifier;
    Range       PrimarySpan;
    bool        Complement;

    ASLine()
            : Complement(false)
    {}
}
;

//
// CO lines
//
struct COSegmentInfo
{
    String      AccessionNumber;
    long        SequenceVersion;
    Range       Span;
    bool        Complement;

    COSegmentInfo()
            : SequenceVersion(0)
            , Complement(false)
    {}
}
;

struct COGapInfo
{
    long         Length;

    COGapInfo()
            : Length(0)
    {}
}
;

//
// Feature Table
//
enum LocationElementType
{
    LOCATION_ELEMENT_SINGLE     = 0,        // x
    LOCATION_ELEMENT_INFRABASE  = 1,        // x^y
    LOCATION_ELEMENT_RANGE      = 2,        // x..y
    LOCATION_ELEMENT_COMPLEMENT = 0x80      // complement(loc)
};

enum BasePositionType
{
    BASE_POSITION_SIMPLE_BASE = 0,
    BASE_POSITION_LEFT_OPEN   = 1,
    BASE_POSITION_RIGHT_OPEN  = 2,
    BASE_POSITION_SPAN        = 3
};

// ----------------------------------------------------------------------
//
//  kind := e_flags & 0xf;
//
//      kind = 0 then Location = x
//      kind = 1 then Location = x^y
//      kind = 2 then Location = x..y
//
//  complement := e_flags & 0x80
//
//  x_flags = 0 then x = x1
//  x_flags = 1 then x = <x1
//  x_flags = 2 then x =>x1
//  x_flags = 3 then x = (x1.x2)
//
//  y_flags = 0 then y = y1
//  y_flags = 1 then y = <y1
//  y_flags = 2 then y =>y1
//  y_flags = 3 then y = (y1.y2)
//
// ----------------------------------------------------------------------
struct LocationElement
{
    String      AccessionNumber;
    int         SequenceVersion;
    unsigned    e_flags, x_flags, y_flags;
    long        x1, x2;
    long        y1, y2;

    LocationElement()
            : SequenceVersion(0)
            , e_flags(0)
            , x_flags(0)
            , y_flags(0)
            , x1 (0)
            , x2 (0)
            , y1 (0)
            , y2 (0)
    {}
}
;

typedef std::vector<LocationElement> LocationElementList;

struct FeatureLocation
{
    bool                IsJoin;
    bool                IsComplement;
    LocationElementList LocationElements;

    // LocationElements.size()> 1 && IsJoin  => join
    // LocationElements.size()> 1 && !IsJoin => order
    // LocationElements.size()> 1 && IsComplement => complement([join|order](loc))

    FeatureLocation()
            : IsJoin(false)
            , IsComplement(false)
    {}
}
;

struct FeatureKey
{
    String          Key;
    FeatureLocation Location;
};

struct FeatureQualifier
{
    String      Name;
    StringList  Value;
};

//
// SQ line
//
struct SQLine
{
    long        SequenceLength;
    long        A_Count;
    long        C_Count;
    long        G_Count;
    long        T_Count;
    long        Other_Count;

    SQLine()
            : SequenceLength(0)
            , A_Count(0)
            , C_Count(0)
            , G_Count(0)
            , T_Count(0)
            , Other_Count(0)
    {}
}
;

//
// sequence data line
//
struct SequenceLine
{
    String      Sequence;
};

//
// CC lines
//
struct CCLine
{
    StringList  Comment;
};

//
//  AC* line
//
// Description:
//    The AC* ine is an internal line type used by genome
//    project submittors to supply their internal identifiers.
//
// Line format:
//
//    AC * _GP_ID[.GP_VERSION] [WGS_VERSION]
//
//    where:
//    - _GP_ID:      identifier starting with an underscore
//    - GP_VERSION:  integer > 0
//    - WGS_VERSION: integer 1-99 (only for WGS projects!)
//
struct ACStarLine
{
    String      GenomeProjectID;
    int         Version;
    int         WGSVersion;

    ACStarLine()
            : Version(0)
            , WGSVersion(0)
    {}
}
;

}
}

#endif // OPEN_EMBL_FIELD_DEFS_INCLUDED

