/**************************************************************************
 * This code is part of the supporting infrastructure for ATA Mapper. 
 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
 * Author: Nathan Edwards
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received (LICENSE.txt) a copy of the GNU General Public 
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *************************************************************************/


// -*- C++ -*- //

#ifndef _KEYWORD_TREE_T
#define _KEYWORD_TREE_T

#include "keyword_tree.h"

template <class KTND>
ktnode<KTND>::ktnode() : fail_(0), output_(0), patid_(0) {};

template <class KTND>
ktnode<KTND>::~ktnode() {
  if (patid_) delete patid_;
}

template <class KTND>
KTND *  ktnode<KTND>::fail() const {
  return fail_;
}

template <class KTND>
void ktnode<KTND>::fail(KTND * const v) {
  fail_ = v;
}

template <class KTND>
KTND *  ktnode<KTND>::output() const {
  return output_;
}

template <class KTND>
void ktnode<KTND>::output(KTND * const v) {
  output_= v;
}

template <class KTND>
tinylist<pattern_list::const_iterator> * const & ktnode<KTND>::patid() const {
  return patid_;
}

template <class KTND>
void ktnode<KTND>::add_patid(pattern_list::const_iterator const & it) {
  if (!patid_) patid_ = new tinylist<pattern_list::const_iterator>;
  patid_->push_front(it);
}

template<class KTND>
void keyword_tree<KTND>::add_keyword_(KTND * v, char const * const pat, 
                                      pattern_list::const_iterator it) {
  assert(v!=0);
  assert(pat!=0);
  
  KTND *c=0;
  // cerr << __FILE__ << ":" << __LINE__ << " " << (void*) v << " " << (void*)pat << " " << pat << " " << (void*) li << endl;  
  if ((*pat) == '\0') {
    // current node is end of keyword...
    // cerr << __FILE__ << ":" << __LINE__ << " " << (void*) v->patid() << endl;  
    // assert(!(v->patid().empty()));
    v->add_patid(it);
    // cerr << __FILE__ << ":" << __LINE__ << endl;  
  } else if ((c=v->lchild(*pat))) {
    relchars_[(unsigned char)(*pat)] = true;
    // cerr << __FILE__ << ":" << __LINE__ << endl;  
    add_keyword_(c,(pat+1),it);
    // cerr << __FILE__ << ":" << __LINE__ << endl;  
  } else {
    relchars_[(unsigned char)(*pat)] = true;
    // cerr << __FILE__ << ":" << __LINE__ << endl;  
    c = v->add_child(*this,*pat);
    // cerr << __FILE__ << ":" << __LINE__ << " " << (void*) c << endl;  
    add_keyword_(c,(pat+1),it);
    // cerr << __FILE__ << ":" << __LINE__ << endl;  
  }
  // cerr << __FILE__ << ":" << __LINE__ << endl;  
}

template <class KTND>
keyword_tree<KTND>::keyword_tree() { 
  root_ = newktnode();
  w_ = root_;
  first_char_ = true;
  relchars_ = new bool[256];
  for (int i=0;i<256;i++) {
    relchars_[i] = false;
  }
};

template <class KTND>
void keyword_tree<KTND>::reset() {
  w_ = root_;
  first_char_ = true;
}

template <class KTND>
keyword_tree<KTND>::~keyword_tree() {
  kid_jump_table<KTND> *kjt;
  std::list<kid_jump_table<KTND>*>::iterator it1;
  for (it1=jump_table_delete_list.begin(); 
       it1!=jump_table_delete_list.end(); ++it1) {
    delete [] *it1;
  }
  typename std::list<KTND*>::iterator it2;
  for (it2=ktnode_delete_list.begin();it2!=ktnode_delete_list.end();++it2) {
    delete [] *it2;
  }
  delete [] relchars_;
}

template <class KTND>
unsigned long keyword_tree<KTND>::add_pattern(std::string const & pat, 
					      long unsigned int id,
					      int esb, int eeb) {
  pattern_list::const_iterator it;
  it = add_pattern_(pat,id,esb,eeb);
  add_keyword_(root_,pat.c_str(),it);
  return id;
}

template <class KTND>
KTND *keyword_tree<KTND>::newktnode() {
  if (ktnodes.empty()) {
    int size=(GETPAGESIZE-128)/sizeof(KTND);
    if (size < 1) {
      size = 1;
    }
    KTND *buffer = new KTND[size];
    for (int i=0;i<size;i++) {
      ktnodes.push_front(buffer+i);
    }
    ktnode_delete_list.push_front(buffer);
  }
  KTND *retval = ktnodes.front();
  ktnodes.pop_front();
  return retval;
}

template <class KTND>
kid_jump_table<KTND> *keyword_tree<KTND>::newktjumptable(unsigned int size) {
  unsigned int number=(GETPAGESIZE-128)/(size*sizeof(kid_jump_table<KTND>));
  if (number < 1) {
    number = 1;
  }
  kid_jump_table<KTND> *buffer= new kid_jump_table<KTND>[size*number];
  for (unsigned int i=0;i<size*number;i++) {
    buffer[i].which=0;
  }
  for (unsigned int i=0;i<number;i++) {
    jump_tables_.push_front(buffer+i*size);
  }
  jump_table_delete_list.push_front(buffer);
  return buffer;
}

template <class KTND>
void keyword_tree<KTND>::compute_failure_links() {
  root_->fail(root_);
  std::list<KTND*> q;
  q.push_back(root_);
  while (!q.empty()) {
    failure_links_(q);
  }
}

template <class KTND>
void keyword_tree<KTND>::optimize_nodes(CharacterProducer & cp) {
  // checkpoint;
  root_->optimize_node(*this,cp);
  // checkpoint;
}

template <class KTND>
void keyword_tree<KTND>::compress_relchars(CharacterProducer & cp) {
  bool *tmp;
  int size=cp.size();
  tmp = new bool[size];
  for (int i=0;i<size;i++) {
    if (relchars_[cp.ch(i)]) {
      tmp[i] = true;
    } else {
      tmp[i] = false;
    }
  }
  delete [] relchars_;
  relchars_ = tmp;
}

template<class KTND>
void keyword_tree<KTND>::failure_links_(std::list<KTND*> & q) {
  KTND *vp;
  vp=q.front();
  q.pop_front();
  tinylist<kid_list_element<KTND> > *l = vp->kids();
  typename tinylist<kid_list_element<KTND> >::iterator it;
  if (l) {
    for (it=l->begin();it!=l->end();++it) {
      KTND *v = it->nd;
      assert(v!=0);
      unsigned char x = it->ch;
      KTND* w = vp->fail();
      assert(w!=0);
      while (!w->lchild(x) && w != root_) {
	w = w->fail();
	if (!w) break;
      }
      KTND* u;
      if (w && (u=w->lchild(x)) && vp != root_) {
	v->fail(u);
	if (u->patid()) {
	  v->output(u);
	} else if (u->output()){
	  v->output(u->output());
	}
      } else {
	v->fail(root_);
      }
    }
    for (it=l->begin();it!=l->end();++it) {
      KTND *v = it->nd;
      q.push_back(v);
    } 
  }
}

template <class KTND>
bool keyword_tree<KTND>::find_patterns(CharacterProducer & cp, 
				       pattern_hit_vector & kas,
				       long unsigned minka) {
  register KTND *wp, *wpp;
  long unsigned kacount=0;
  register bool eof;
  tinylist<pattern_list::const_iterator>::iterator pid,pide;
  if (eof=cp.eof()) return false;
  if (first_char_) {
    ch_ = cp.getnch();
    first_char_ = false;
  }
  while (!eof) {
    while (relchars_[ch_] && (wp=w_->child(ch_))) {
      if (wp->patid()) {
	pide = wp->patid()->end();
	for (pid=wp->patid()->begin();pid!=pide;++pid) {
	  kas.push_back(cp.pos(),*pid);
	  kacount++;
	} 
      }
      wpp = wp->output();
      while(wpp && wpp->patid()) {
	pide = wp->patid()->end();
	for (pid=wpp->patid()->begin();pid!=pide;++pid) {
	  kas.push_back(cp.pos(),*pid);
	  kacount++;
	} 
	wpp = wpp->output();
      }
      w_ = wp;
      if (eof=cp.eof()) break;
      ch_ = cp.getnch();
    }
    if (eof) break;
    if (w_ == root_) {
      if (eof=cp.eof()) break;
      ch_ = cp.getnch();
    } else {
      w_ = w_->fail();
    }
    if (kacount >= minka) {
      report_progress(cp);
      return true;
    }
  } 
  report_progress(cp);
  if (kacount > 0) return true;
  return false;
}

template <class KTND>
bool keyword_tree<KTND>::find_suffixes(CharacterProducer & cp, 
				       pattern_hit_vector & kas,
				       long unsigned int minlevel) {
  // checkpoint;
  register KTND *wp, *wpp;
  register bool eof;
  tinylist<pattern_list::const_iterator>::iterator pid,pide;
  if (eof=cp.eof()) return false;
  if (first_char_) {
    ch_ = cp.getnch();
    // cerr << "Got " << cp.pos() << " " << ch_ << endl;
    first_char_ = false;
  }
  while (!eof) {
    while (relchars_[ch_] && (wp=w_->child(ch_))) {
      // cerr << "Match " << ch_ << endl;
      w_ = wp;
      // cerr << w_->level() << endl;
      if (eof=cp.eof()) break;
      ch_ = cp.getnch();
      // cerr << "Got " << cp.pos() << " " << ch_ << endl;
    }
    if (eof) break;
    if (w_ == root_) {
      if (eof=cp.eof()) break;
      // cerr << "At root" << endl;
      ch_ = cp.getnch();
      // cerr << "Got " << cp.pos() << " " << ch_ << endl;
    } else {
      // cerr << "Fail " << endl;
      w_ = w_->fail();
      // cerr << w_->level() << endl;
    }
  } 
  if (w_ == root_ || w_->level() < minlevel) {
    // checkpoint;
    return false;    
  } else {
    // We want a list of all the suffixes supported by this input. 
    // the current node, w_, represents the longest suffix of our input 
    // checkpoint;
    // cerr << w_->level() << endl;    
    while (w_ != root_ && w_->level() >= minlevel) {
      tinylist<KTND *> nodestack;
      nodestack.push_front(w_);
      // checkpoint;
      while (!nodestack.empty()) {
	// checkpoint;
	wp = *(nodestack.begin());
	nodestack.pop(); 
	// cerr << wp->level() << endl;
	if (wp->patid()) {
	  pide = wp->patid()->end();
	  for (pid=wp->patid()->begin();pid!=pide;++pid) {
	    kas.push_back(w_->level(),*pid);
	    // checkpoint;
	    // cerr << w_->level() << " " << (*pid)->pattern() << endl;
	  } 
	}
	// checkpoint;
	if (wp->kids() && !wp->kids()->empty()) {
	  // checkpoint;
	  tinylist<kid_list_element<KTND> > *l = wp->kids();
	  typename tinylist<kid_list_element<KTND> >::iterator it;
	  it = l->begin();
	  while (it != l->end()) {
	    // cerr << "Added " << it->ch << endl;
	    nodestack.push_front(it->nd);
	    ++it;
	  }
	}
      }
      w_ = w_->fail();
    }
    kas.normalize_strict_byvalue();

    pattern_hit_vector::iterator phlit,phlit0;
    long unsigned int removed=0;
    phlit = kas.begin();
    phlit0 = phlit;
    if (phlit != kas.end()) ++phlit;
    while (phlit != kas.end()) {
      if (phlit0->value() == phlit->value()) {
	phlit0->key() = MAXINT;
	removed++;
      }
      phlit0 = phlit;
      ++phlit;
    }
    kas.normalize();
    kas.resize(kas.size()-removed);
    // checkpoint;
    return true;
  }
}

#endif
