# Mavric -- a module for manipulating and visualizing phylogenies

# Copyright (C) 2000 Rick Ree
# Email : rree@oeb.harvard.edu
# 	   
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2 
# of the License, or (at your option) any later version.
#   
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details. 
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

import re, string

# nexus block regexps
block_start_pattern_string = r'begin\s+(?P<block_name>\w+)\s*;'

block_end_pattern_string = r'(?P<end>end(\b|block)\s*;)'

block_start_pattern = re.compile(block_start_pattern_string,
                                 re.IGNORECASE | re.DOTALL)

block_end_pattern = re.compile(block_end_pattern_string,
                               re.IGNORECASE | re.DOTALL)



_name2cls = {}

class NexusBlock:
    def __init__(self, **kw):
        """
        keys to kw should be:
        'name', 'contents', 'start', 'end', 'span', 'cstart', 'cend', 'cspan'
        """
        self.__dict__ = kw

    def __repr__(self):
        return "begin %s;\n%s\nendblock;\n" % (self.name, self.contents)

class TreesBlock(NexusBlock):
    # trees block regexps
    tree_pattern_string = r'\btree\s+(?P<name>\w+)\s*[^(]*'+\
                          r'(?P<newick_description>\(.+\).*;)'
    tree_pattern= re.compile(tree_pattern_string,
                             re.IGNORECASE | re.DOTALL)

    translate_pattern = re.compile( \
        r'\btranslate\s+(?P<translate_contents>.+?);',
        re.IGNORECASE | re.DOTALL)

    def __init__(self, **kw):
        self.__dict__ = kw

        s = self.contents

        # treelist is a list of (name, newick_description) tuples
        self.treelist = self.tree_pattern.findall(s)

        # ttable is a mapping of the nexus translate command
        self.ttable = None
        tt_match = self.translate_pattern.search(s)
        if tt_match != None:
            self.ttable = {}
            tts = string.split(string.strip(tt_match.group(1)), ',')
            tt = self.ttable; split = string.split
            for kvp in tts:
                k, v = split(kvp); tt[k] = v

_name2cls['trees'] = TreesBlock


def get_block(s, name, pos=0):
    start_template = r'\bbegin\s+%s\s*;'

    start_pattern = re.compile(start_template % name, re.IGNORECASE)
    end_pattern = re.compile(r'\bend(\b|block)\s*;', re.IGNORECASE)

    start_match = start_pattern.search(s, pos)
    if start_match == None: return None

    end_match = end_pattern.search(s, start_match.end())
    if end_match == None:
        raise 'NexusError', 'no end to block %s' % name

    span = (start_match.start(), end_match.end())
    cspan = (start_match.end(), end_match.start())
    contents = s[cspan[0]:cspan[-1]]

    cls = _name2cls[string.lower(name)] or NexusBlock
    return cls(name = name,        
               contents = contents,
               start = span[0],    
               end = span[-1],     
               span = span,        
               cstart = cspan[0],  
               cend = cspan[-1],   
               cspan = cspan)

if __name__ == '__main__':

    s = open('/tmp/testtext.txt', 'r').read()

    tb = get_block(s, 'trees')
    #print tb.trees()
    print tb.ttable



#find all blocks in s
#      pos = 0
#      while 1:
#          b = get_block(s, 'trees', pos)
#          if b == None: break
#          pos = b.end

