// =============================================================================
// CD-HIT
// http://cd-hit.org/
// http://bioinformatics.burnham-inst.org/cd-hi
// 
// program written by 
//                                      Weizhong Li
//                                      UCSD, San Diego Supercomputer Center
//                                      La Jolla, CA, 92093
//                                      Email liwz@sdsc.edu
//                 at
//                                      Adam Godzik's lab
//                                      The Burnham Institute
//                                      La Jolla, CA, 92037
//                                      Email adam@burnham-inst.org
// =============================================================================

#include "cdhit-common.h"
#include "cdhit-utility.h"

// next two control how if seqs in db2 is longer than reps in db1
// by deault, only seqs in db2 that are shorter than rep in db1 
// are clustered to the rep in db1

Options options;
SequenceDB seq_db;
SequenceDB seq_db2;
struct tms CPU_current, CPU_begin, CPU_end;

////////////////////////////////////  MAIN /////////////////////////////////////
int main(int argc, char **argv)
{
  string db_in;
  string db_in2;
  string db_out;

  times(&CPU_begin);

  // ***********************************    parse command line and open file
  if (argc < 7) print_usage_2d(argv[0]);
  if (options.SetOptions( argc, argv, true ) == 0) print_usage_2d(argv[0]);
  options.Validate();

  db_in = options.input;
  db_in2 = options.input2;
  db_out = options.output;

  InitNAA( MAX_UAA );
  seq_db.NAAN = seq_db2.NAAN = NAAN_array[options.NAA];
  seq_db.word_table.init(options.NAA, seq_db.NAAN);

  seq_db.Read( db_in.c_str(), options );
  cout << "total seq in db1: " << seq_db.sequences.size() << endl;

  seq_db2.Read( db_in2.c_str(), options );
  cout << "total seq in db2: " << seq_db2.sequences.size() << endl;

  seq_db.SortDivide( options );
  seq_db2.SortDivide( options, false );
  seq_db.MakeWordTable( options );
  seq_db2.ClusterTo( seq_db, options );

  cout << "writing non-redundant sequences from db2" << endl;
  seq_db2.WriteClusters( db_in2.c_str(), db_out.c_str(), options );

  // write a backup clstr file in case next step crashes
  seq_db2.WriteExtra2D( seq_db, options );
  cout << "program completed !" << endl << endl;
  times(&CPU_end);
  show_cpu_time(CPU_begin, CPU_end);
  return 0;
} // END int main

