/**************************************************************************
 * This code is part of the supporting infrastructure for ATA Mapper. 
 * Copyright (C) 2002,2003,2004 Applera Corporation. All rights reserved.
 * Author: Nathan Edwards
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received (LICENSE.txt) a copy of the GNU General Public 
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *************************************************************************/

// -*- c++ -*-

#ifndef _CHAR_IO_T_
#define _CHAR_IO_T_

#include <iostream>
#include <fstream>
#include "util.h"
#include "types.h"

#if !defined(NO_STD_NAMESPACE)
using namespace std;
#endif

#define BUFTYPE bigword

template <class T>
class Compressed : public T {
private:
  char *chmap_;
  int *invchmap_;
  unsigned int chmapsize_;
  BUFTYPE *buffer_;
  BUFTYPE *bufp_;
  unsigned int ucharsperbuftype_;
  unsigned int bufsize_;
  unsigned int bufbits_;
  unsigned int bits_;
  unsigned int buftypebits_;
  unsigned int bufpos_;
  unsigned int bitpos_;
  unsigned int bitsperbuf_;
  BUFTYPE *mask0_;
  int *shift0_;
  BUFTYPE *mask1_;
  int *shift1_;
  bool eof_;
  FILE_POSITION_TYPE pos_;
public:
  Compressed(std::string const & filename) 
    : T((filename.substr(0,filename.rfind(".seq"))+".sqz").c_str()) {
    // timestamp("Begining of CompressedChars constructor...");
    eof_ = false;
    pos_ = 0;
    chmap_ = new char[256];
    std::string chmapfn = filename.substr(0,filename.rfind(".seq"))+".tbz";
    ifstream chmapfile(chmapfn.c_str());
    chmapfile.read(chmap_,256);
    chmapsize_=chmapfile.gcount();
    chmapfile.close();
    invchmap_ = new int[256];
    for (int i=0;i<256;i++) {
      invchmap_[i] = -1;
    }
    for (unsigned int i=0;i<chmapsize_;i++) {
      invchmap_[chmap_[i]] = i;
    }
    bits_=1;
    while ( (((unsigned)1) << bits_) < chmapsize_ ) {
      bits_++;
    } 
    buftypebits_ = 8*sizeof(BUFTYPE);
    bufsize_ = least_common_multiple(bits_,buftypebits_)/buftypebits_;
    bufbits_ = buftypebits_*bufsize_;
    bitsperbuf_ = bufbits_/bits_;
    ucharsperbuftype_ = sizeof(BUFTYPE)/sizeof(unsigned char);
    buffer_ = new BUFTYPE[bufsize_];
    bitpos_ = 0;
    mask0_ = new BUFTYPE[buftypebits_*bufsize_];
    mask1_ = new BUFTYPE[buftypebits_*bufsize_];
    shift0_ = new int[buftypebits_*bufsize_];
    shift1_ = new int[buftypebits_*bufsize_];
    for (unsigned int i=0;i<buftypebits_*bufsize_;i++) {
      mask0_[i] = 0;
      mask1_[i] = 0;
      shift0_[i] = 0;
      shift1_[i] = 0;
    }
    for (unsigned int i=0;i<buftypebits_*bufsize_;i+=bits_) {
      int shift = (buftypebits_-bits_-(i%buftypebits_));
      BUFTYPE mask = ((1 << bits_)-1);
      if (shift>0) {
	mask0_[i] = mask << shift;
      } else if (shift<0) {
	mask0_[i] = mask >> -shift;
      } else {
	mask0_[i] = mask;
      }
      shift0_[i] = shift;
      if (i/buftypebits_ < (i+bits_-1)/buftypebits_) {
	shift = (buftypebits_ - (bits_+(i%buftypebits_)-buftypebits_));
	if (shift>0) {
	  mask1_[i] = mask << shift;
	} else if (shift<0) {
	  mask1_[i] = mask >> -shift;
	} else {
	  mask1_[i] = mask;
	}
	shift1_[i] = shift;
      }
    }
  }
  inline bool eof() const {
    return eof_;
  }
  unsigned char getnch() {
    // checkpoint;
    if (bitpos_ == 0) {	
      if (T::eof()) {
	eof_= true;
	return (unsigned char)EOF;
      } else {
	BUFTYPE *p(buffer_);
	for (unsigned int i=0;i<bufsize_;i++) {
	  for (unsigned int j=0;j<ucharsperbuftype_;j++) {
	    (*p) <<= 8;
	    (*p) |= T::getnch();
	  }
	  p++;
	}
      }
      bufp_ = buffer_;
    }
    BUFTYPE val;
    int shift0(shift0_[bitpos_]);
    if (shift0>0) {
      val = (((*bufp_)&mask0_[bitpos_]) >> shift0);
    } else if (shift0<0) {
      val = (((*bufp_)&mask0_[bitpos_]) << -shift0);      
    } else {
      val = ((*bufp_)&mask0_[bitpos_]);            
    }
    if (mask1_[bitpos_]) {
      bufp_++;
      val |= (((*bufp_)&mask1_[bitpos_]) >> shift1_[bitpos_]);
    }
    bitpos_ += bits_;
    if (bitpos_ == bufbits_) {
      bitpos_ = 0;
      if (T::eof()) {
	eof_ = true;
      }
    }
    pos_++;
    // cerr << val << endl;
    return val;
  }
  inline char getch() {
    return (char) chmap_[getnch()];
  }
  inline char ch(unsigned char nch) {
    return chmap_[nch];
  }
  inline int nch(char ch) {
    return invchmap_[ch];
  }
  inline FILE_POSITION_TYPE pos() const {
    // checkpoint;
    return pos_;
  }
  void pos(FILE_POSITION_TYPE p) {
    // checkpoint;
    // cerr << ((eof_)?"eof_ true":"eof_ false") << endl;
    // cerr << ((T::eof())?"T::eof() true":"T::eof() false") << endl;
    // cerr << p << endl; 
    // cerr << bitsperbuf_ << endl;
    // cerr << (p/bitsperbuf_)*bufsize_*ucharsperbuftype_ << endl;
    // cerr << T::pos() << endl;
    T::pos((p/bitsperbuf_)*bufsize_*ucharsperbuftype_);
    // cerr << T::pos() << endl;
    pos_ = p - (p%bitsperbuf_);
    // cerr << pos_ << endl;
    // cerr << ((eof_)?"eof_ true":"eof_ false") << endl;
    // cerr << ((T::eof())?"T::eof() true":"T::eof() false") << endl;
    bufpos_ = 0;
    bitpos_ = 0;
    // checkpoint;
    while (pos_ < p) {
      // cerr << pos_ << endl;
      getnch();
    }
    // cerr << ((eof_)?"eof_ true":"eof_ false") << endl;
    // cerr << ((T::eof())?"T::eof() true":"T::eof() false") << endl;
    eof_ = T::eof();
    // cerr << ((eof_)?"eof_ true":"eof_ false") << endl;
    // cerr << ((T::eof())?"T::eof() true":"T::eof() false") << endl;
  }
  inline void reset() {
    T::reset();
    pos_ = 0;
    bufpos_ = 0;
    bitpos_ = 0;
    eof_ = false;
  }
  inline unsigned int size() const {
    return chmapsize_;
  }
  float progress() const {
    return T::progress();
  }
  ~Compressed() {
    delete [] chmap_;
    delete [] invchmap_;
    delete [] buffer_;
    delete [] mask0_;
    delete [] mask1_;
    delete [] shift0_;
    delete [] shift1_;
  }
};

template <class T>
class Normalized : public T {
private:
  unsigned int chmapsize_;
  int *invchmap_;
  char *chmap_;
public:
  Normalized(std::string const & filename) 
    : T((filename.substr(0,filename.rfind(".seq"))+".sqn").c_str()) {
    chmap_ = new char[256];
    std::string mapfn = filename.substr(0,filename.rfind(".seq"))+".tbl";
    ifstream chmapfile(mapfn.c_str());
    chmapfile.read(chmap_,256);
    chmapsize_=chmapfile.gcount();
    chmapfile.close();
    invchmap_ = new int[256];
    for (int i=0;i<256;i++) {
      invchmap_[i] = -1;
    }
    for (unsigned int i=0;i<chmapsize_;i++) {
      invchmap_[chmap_[i]] = i;
    }
  }
  inline bool eof() const {
    return T::eof();
  }
  inline char ch(unsigned char nch) {
    return chmap_[nch];
  }
  inline int nch(char ch) {
    return invchmap_[ch];
  }
  inline char getch() {
    return chmap_[getnch()];
  }
  inline unsigned char getnch() {
    return T::getch();
  }
  inline FILE_POSITION_TYPE pos() const {
    return T::pos();
  }
  inline void pos(FILE_POSITION_TYPE p) {
    T::pos(p);
  }
  inline void reset() {
    T::reset();
  }
  ~Normalized() {
    delete [] chmap_;
    delete [] invchmap_;
  }
  inline unsigned int size() const {
    return chmapsize_;
  }
  float progress() const {
    return T::progress();
  }
};

#endif
