ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cd-hit/mcd-hit.c++
Revision: 1.2
Committed: Mon Mar 8 10:26:09 2004 UTC (17 years, 7 months ago) by dmb
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +10 -7 lines
Log Message:
Maintainance release by lducazu.

Line File contents
1 // =============================================================================
2 // CD-HIT
3 // http://bioinformatics.burnham-inst.org/cd-hi
4 //
5 // program written by
6 // Weizhong Li
7 // UCSD, San Diego Supercomputer Center
8 // La Jolla, CA, 92093
9 // Email liwz@sdsc.edu
10 // at
11 // Adam Godzik's lab
12 // The Burnham Institute
13 // La Jolla, CA, 92037
14 // Email adam@burnham-inst.org
15 // =============================================================================
16
17 #include "cd-hi.h"
18 #include "cd-hi-init.h"
19
20 //////////////////////////////////// MAIN /////////////////////////////////////
21 int main(int argc, char **argv) {
22 int i, j, k, i1, j1, sggi, sgj;
23 char db_in[MAX_FILE_NAME];
24 char db_out[MAX_FILE_NAME];
25 char db_clstr[MAX_FILE_NAME];
26 char db_clstr_bak[MAX_FILE_NAME];
27 char db_clstr_old[MAX_FILE_NAME];
28
29 // *********************************** parse command line and open file
30 if (argc < 5) print_usage(argv[0]);
31
32 for (i=1; i<argc; i++) {
33 if (strcmp(argv[i], "-i") == 0)
34 strncpy(db_in, argv[++i], MAX_FILE_NAME-1);
35 else if (strcmp(argv[i], "-o") == 0)
36 strncpy(db_out, argv[++i], MAX_FILE_NAME-1);
37 else if (strcmp(argv[i], "-u") == 0) {
38 strncpy(db_clstr_old, argv[++i], MAX_FILE_NAME-1);
39 old_clstr_file = 1;
40 }
41 else if (strcmp(argv[i], "-M") == 0)
42 mem_limit = 1000000 * atoi(argv[++i]);
43 else if (strcmp(argv[i], "-l") == 0)
44 length_of_throw = atoi(argv[++i]);
45 else if (strcmp(argv[i], "-c") == 0) {
46 NR_clstr = atof(argv[++i]);
47 if ((NR_clstr > 1.0) || (NR_clstr < 0.4)) bomb_error("invalid clstr");
48 NR_clstr100 = (int) (NR_clstr * 100 );
49 }
50 else if (strcmp(argv[i], "-L") == 0) {
51 NR_cov = atof(argv[++i]);
52 if ((NR_cov > 1.0) || (NR_cov < 0.0)) bomb_error("invalid coverage cutoff");
53 }
54 else if (strcmp(argv[i], "-b") == 0) {
55 BAND_width = atoi(argv[++i]);
56 if (BAND_width < 0 ) bomb_error("invalid band width");
57 }
58 else if (strcmp(argv[i], "-n") == 0) {
59 NAA = atoi(argv[++i]);
60 if ( NAA < 2 || NAA > 5 ) bomb_error("invalid word length");
61 }
62 else if (strcmp(argv[i], "-d") == 0) {
63 des_len = atoi(argv[++i]);
64 if ( des_len < 15 )
65 bomb_error("too short description, not enough to identify sequences");
66 }
67 else if (strcmp(argv[i], "-t") == 0) {
68 tolerance = atoi(argv[++i]);
69 if ( tolerance < 0 || tolerance > 5 ) bomb_error("invalid tolerance");
70 }
71 else
72 print_usage(argv[0]);
73 }
74 db_clstr[0]=0; strcat(db_clstr,db_out); strcat(db_clstr,".clstr");
75 db_clstr_bak[0]=0; strcat(db_clstr_bak,db_out); strcat(db_clstr_bak,".bak.clstr");
76
77 if ( NAA == 2 ) { NAAN = NAA2; }
78 else if ( NAA == 3 ) { NAAN = NAA3; }
79 else if ( NAA == 4 ) { NAAN = NAA4; }
80 else if ( NAA == 5 ) { NAAN = NAA5; }
81 else bomb_error("invalid -n parameter!");
82
83 word_table.init(NAA, NAAN);
84
85 if ( tolerance ) {
86 int clstr_idx = (int) (NR_clstr * 100) - naa_stat_start_percent;
87 int tcutoff = naa_stat[tolerance-1][clstr_idx][5-NAA];
88
89 if (tcutoff < 5 )
90 bomb_error("Too short word length, increase it or the tolerance");
91 for ( i=5; i>NAA; i--) {
92 if ( naa_stat[tolerance-1][clstr_idx][5-i] > 10 ) {
93 cout << "Your word length is " << NAA << ", using "
94 << i << " may be faster!" <<endl;
95 break;
96 }
97 }
98 }
99 else {
100 if ( NR_clstr > 0.85 && NAA < 5)
101 cout << "Your word length is " << NAA
102 << ", using 5 may be faster!" <<endl;
103 else if ( NR_clstr > 0.80 && NAA < 4 )
104 cout << "Your word length is " << NAA
105 << ", using 4 may be faster!" <<endl;
106 else if ( NR_clstr > 0.75 && NAA < 3 )
107 cout << "Your word length is " << NAA
108 << ", using 3 may be faster!" <<endl;
109 }
110
111 if ( length_of_throw <= NAA ) bomb_error("Too short -l, redefine it");
112
113 ifstream in1(db_in);
114 if ( ! in1 ) { cout << "Can not open file" << db_in << endl; exit(1); }
115 ofstream out1(db_out);
116 if ( ! out1) { cout << "Can not open file" << db_out << endl; exit(1); }
117 ofstream out2(db_clstr);
118 if ( ! out2) { cout << "Can not open file" << db_clstr << endl; exit(1); }
119 ofstream out2_bak(db_clstr_bak);
120 if ( ! out2_bak) { cout << "Can not open file" << db_clstr_bak << endl; exit(1); }
121
122 DB_no = db_seq_no_test(in1);
123 if ((NR_len = new int [DB_no]) == NULL) bomb_error("Memory");
124 if ((NR_idx = new int [DB_no]) == NULL) bomb_error("Memory");
125 if ((NR90_idx = new int [DB_no]) == NULL) bomb_error("Memory");
126 if ((NR_clstr_no = new int [DB_no]) == NULL) bomb_error("Memory");
127 if ((NR_iden = new char [DB_no]) == NULL) bomb_error("Memory");
128 if ((NR_coverage = new char [DB_no]) == NULL) bomb_error("Memory");
129 if ((NR_flag = new char [DB_no]) == NULL) bomb_error("Memory");
130 if ((NR_seq = new char *[DB_no]) == NULL) bomb_error("Memory");
131 int *Clstr_no, *(*Clstr_list);
132 if ((Clstr_no = new int [DB_no]) == NULL) bomb_error("Memory");
133 if ((Clstr_list = new int *[DB_no]) == NULL) bomb_error("Memory");
134 if ((NR90f_idx = new int [DB_no]) == NULL) bomb_error("Memory");
135
136 if ( old_clstr_file ) {
137 ifstream in_clstr(db_clstr_old);
138 if ( ! in_clstr) {
139 cout << "Can not open file" << db_clstr_old << endl;
140 exit(1);
141 }
142
143 //number of seq in old clstr file
144 int clstr_seq_no = old_clstr_seq_no_test(in_clstr);
145 in_clstr.clear();
146 in_clstr.open(db_clstr_old);
147
148 if ((NRo_idx = new int [clstr_seq_no]) == NULL) bomb_error("Memory");
149 if ((NRo_id1 = new int [clstr_seq_no]) == NULL) bomb_error("Memory");
150 if ((NRo_id2 = new int [clstr_seq_no]) == NULL) bomb_error("Memory");
151 if ((NRo_clstr_no = new int [clstr_seq_no]) == NULL) bomb_error("Memory");
152 if ((NRo_NR_idx = new int [clstr_seq_no]) == NULL) bomb_error("Memory");
153 if ((NRo_iden = new char[clstr_seq_no]) == NULL) bomb_error("Memory");
154 old_clstr_read_in(in_clstr, NRo_no, NRo90_no, NRo_idx, NRo_id1, NRo_id2,
155 NRo_iden, NRo_clstr_no, NRo_NR_idx);
156 in_clstr.close();
157 }
158
159 in1.clear();
160 in1.open(db_in);
161 NRo_no > 0 ?
162 db_read_in2(in1, length_of_throw, NR_no, NR_seq, NR_len,
163 NRo_no, NRo_idx, NRo_id1, NRo_id2, NRo_NR_idx):
164 db_read_in(in1, length_of_throw, NR_no, NR_seq, NR_len);
165 in1.close();
166 cout << "total seq: " << NR_no << endl;
167
168 // ********************************************* init NR_flag
169 for (i=0; i<NR_no; i++) NR_flag[i] = 0;
170 if ( old_clstr_file ) {
171 for (i=0; i<NRo_no; i++) {
172 if ( (j = NRo_NR_idx[i]) == -1 ) continue ;
173 if ( NRo_clstr_no[i] == i ) NR_flag[j] |= IS_OLD_REP;
174
175 if ( (k = NRo_NR_idx[ NRo_clstr_no[i] ]) == -1 ) continue ;
176 if ( NRo_clstr_no[i] != i ) NR_flag[j] |= IS_OLD_REDUNDANT;
177 NR_iden[j] = NRo_iden[i];
178 NR_clstr_no[j] = k; // note, later it need be changed to NR90_no
179 }
180 delete [] NRo_idx;
181 delete [] NRo_id1;
182 delete [] NRo_id2;
183 delete [] NRo_iden;
184 delete [] NRo_clstr_no;
185 delete [] NRo_NR_idx;
186 }
187
188 sort_seqs_divide_segs(NR_no, NR_len, NR_idx, NR_seq, mem_limit, NAAN,
189 SEG_no, SEG_b, SEG_e, db_swap);
190
191 // ********************************************* Main loop
192 char *seqi;
193 double aa1_cutoff = NR_clstr;
194 double aa2_cutoff = 1 - (1-NR_clstr)*2;
195 double aan_cutoff = 1 - (1-NR_clstr)*NAA;
196 int len, hit_no, has_aa2, iden_no, aan_no, segb;
197 int aan_list[MAX_SEQ];
198 INTs aan_list_no[MAX_SEQ];
199 int frg1, frg2, segfb;
200 int aan_list_backup[MAX_SEQ];
201 INTs *look_and_count;
202 NR_frag_no = 0;
203 for (i=0; i<NR_no; i++) NR_frag_no += (NR_len[i] - NAA ) / Frag_size + 1;
204 if ((look_and_count= new INTs[NR_frag_no]) == NULL) bomb_error("Memory");
205
206 if ( tolerance ) {
207 int clstr_idx = (int) (NR_clstr * 100) - naa_stat_start_percent;
208 double d2 = ((double) (naa_stat[tolerance-1][clstr_idx][3] )) / 100;
209 double dn = ((double) (naa_stat[tolerance-1][clstr_idx][5-NAA] )) / 100;
210 aa2_cutoff = d2 > aa2_cutoff ? d2 : aa2_cutoff;
211 aan_cutoff = dn > aan_cutoff ? dn : aan_cutoff;
212 }
213
214 NR90_no = 0; NR90f_no = 0;
215 for (sggi=0; sggi<SEG_no; sggi++) {
216 if (SEG_no >1)
217 cout << "SEG " << sggi << " " << SEG_b[sggi] << " " << SEG_e[sggi] <<endl;
218
219 for (sgj=sggi-1; sgj>=0; sgj--) {
220 cout << "Reading swap" << endl;
221 if ( sgj != sggi-1) word_table.read_tbl(db_swap[sgj]); // reading old segment
222 cout << "Comparing with SEG " << sgj << endl;
223 for (i1=SEG_b[sggi]; i1<=SEG_e[sggi]; i1++) {
224 i = NR_idx[i1];
225 if (NR_flag[i] & IS_REDUNDANT ) continue;
226
227 if ( (NR_flag[i] & IS_OLD_REDUNDANT) &&
228 (NR_flag[ NR_clstr_no[i] ] & IS_REP) ) {
229 NR_clstr_no[i] = - (NR_clstr_no[ NR_clstr_no[i] ]) - 1;
230 NR_flag[i] |= IS_REDUNDANT ;
231 delete [] NR_seq[i];
232 continue;
233 }
234
235 len = NR_len[i]; seqi = NR_seq[i];
236 frg1 = (len - NAA ) / Frag_size + 1;
237 frg2 = (len - NAA + BAND_width ) / Frag_size + 1;
238 has_aa2 = 0;
239
240 int flag = check_this_short(len, seqi, has_aa2,
241 NAA, aan_no, aan_list, aan_list_no,
242 aan_list_backup, look_and_count,
243 hit_no, SEG90_b[sgj], SEG90_e[sgj],
244 frg2, SEG90f_b[sgj], SEG90f_e[sgj], iden_no,
245 aa1_cutoff, aa2_cutoff, aan_cutoff,
246 NR_flag[i], NR_flag) ;
247
248 if ( flag == 1) { // if similar to old one delete it
249 delete [] NR_seq[i];
250 NR_clstr_no[i] = -hit_no-1; // (-hit_no-1) for non representatives
251 NR_iden[i] = iden_no * 100 / len;
252 NR_flag[i] |= IS_REDUNDANT ;
253 }
254 } //for (i1=SEG_b[sggi]; i1<=SEG_e[sggi]; i1++)
255 } // for (sgj=0; sgj<sggi; sgj++)
256
257 if (SEG_no >1) cout << "Refresh Memory" << endl;
258 word_table.clean();
259
260 if (SEG_no >1) cout << "Self comparing" << endl;
261 segb = NR90_no;
262 segfb = NR90f_no;
263 for (i1=SEG_b[sggi]; i1<=SEG_e[sggi]; i1++) {
264 i = NR_idx[i1];
265
266 if ( ! (NR_flag[i] & IS_REDUNDANT) ) {
267 if ( (NR_flag[i] & IS_OLD_REDUNDANT) &&
268 (NR_flag[ NR_clstr_no[i] ] & IS_REP) ) {
269 NR_clstr_no[i] = - (NR_clstr_no[ NR_clstr_no[i] ]) - 1;
270 NR_flag[i] |= IS_REDUNDANT ;
271 delete [] NR_seq[i];
272 }
273 else {
274 len = NR_len[i]; seqi = NR_seq[i];
275 frg1 = (len - NAA ) / Frag_size + 1;
276 frg2 = (len - NAA + BAND_width ) / Frag_size + 1;
277 has_aa2 = 0;
278
279 int flag = check_this_short(len, seqi, has_aa2,
280 NAA, aan_no, aan_list, aan_list_no,
281 aan_list_backup, look_and_count,
282 hit_no, segb, NR90_no-1, frg2, segfb, NR90f_no-1, iden_no,
283 aa1_cutoff, aa2_cutoff, aan_cutoff,
284 NR_flag[i], NR_flag);
285
286 if ( flag == 1) { // if similar to old one delete it
287 delete [] NR_seq[i];
288 NR_clstr_no[i] = -hit_no-1; // (-hit_no-1) for non representatives
289 NR_iden[i] = iden_no * 100 / len;
290 }
291 else { // else add to NR90 db
292 NR90_idx[NR90_no] = i;
293 NR_clstr_no[i] = NR90_no; // positive value for representatives
294 NR_iden[i] = 0;
295 NR_flag[i] |= IS_REP;
296 add_in_lookup_table_short(aan_no, frg1, aan_list_backup,
297 aan_list_no);
298 NR90f_idx[NR90_no] = NR90f_no;
299 NR90f_no += frg1;
300 NR90_no++;
301 } // else
302 } // else
303 } // if ( ! (NR_flag[i] & IS_REDUNDANT) )
304
305 if ( (i1+1) % 100 == 0 ) {
306 cerr << ".";
307 if ( (i1+1) % 1000 == 0 )
308 cout << i1+1 << " finished\t" << NR90_no << " clusters" << endl;
309 }
310 } // for (i1=SEG_b[sggi]; i1<=SEG_e[sggi]; i1++) {
311
312 SEG90_b[sggi] = segb; SEG90_e[sggi] = NR90_no-1;
313 SEG90f_b[sggi] = segfb; SEG90f_e[sggi] = NR90f_no-1;
314 if ( sggi < SEG_no-2 ) word_table.write_tbl( db_swap[sggi] ); // if not last segment
315 } // for (sggi=0; sggi<SEG_no; sggi++) {
316 cout << NR_no << " finished\t" << NR90_no << " clusters" << endl;
317
318 for (i=0; i<NR90_no; i++) delete [] NR_seq[ NR90_idx[i] ];
319
320 cout << "writing new database" << endl;
321 in1.clear();
322 in1.open(db_in);
323 db_read_and_write(in1, out1, length_of_throw, des_len, NR_seq, NR_clstr_no);
324 in1.close(); out1.close();
325
326 // write a backup clstr file in case next step crashes
327 for (i=0; i<NR_no; i++) {
328 j1 = NR_clstr_no[i];
329 if ( j1 < 0 ) j1 =-j1-1;
330 out2_bak << j1 << "\t" << NR_len[i] << "aa, "<< NR_seq[i] << "...";
331 if ( NR_iden[i]>0 ) out2_bak << " at " << int(NR_iden[i]) << "%" << endl;
332 else out2_bak << " *" << endl;
333 }
334 out2_bak.close();
335
336 cout << "writing clustering information" << endl;
337 // write clstr information
338 // I mask following 3 lines, because it crash when clusters NR
339 // I thought maybe there is not a big block memory now, so
340 // move the new statement to the begining of program, but because I
341 // don't know the NR90_no, I just use DB_no instead
342 // int *Clstr_no, *(*Clstr_list);
343 // if ((Clstr_no = new int[NR90_no]) == NULL) bomb_error("Memory");
344 // if ((Clstr_list = new int*[NR90_no]) == NULL) bomb_error("Memory");
345
346
347 for (i=0; i<NR90_no; i++) Clstr_no[i]=0;
348 for (i=0; i<NR_no; i++) {
349 j1 = NR_clstr_no[i];
350 if ( j1 < 0 ) j1 =-j1-1;
351 Clstr_no[j1]++;
352 }
353 for (i=0; i<NR90_no; i++) {
354 if((Clstr_list[i] = new int[ Clstr_no[i] ]) == NULL) bomb_error("Memory");
355 Clstr_no[i]=0;
356 }
357
358 for (i=0; i<NR_no; i++) {
359 j1 = NR_clstr_no[i];
360 if ( j1 < 0 ) j1 =-j1-1;
361 Clstr_list[j1][ Clstr_no[j1]++ ] = i;
362 }
363
364 for (i=0; i<NR90_no; i++) {
365 out2 << ">Cluster " << i << endl;
366 for (k=0; k<Clstr_no[i]; k++) {
367 j = Clstr_list[i][k];
368 out2 << k << "\t" << NR_len[j] << "aa, "<< NR_seq[j] << "...";
369 if ( NR_iden[j]>0 ) out2 << " at " << int(NR_iden[j]) << "%" << endl;
370 else out2 << " *" << endl;
371 }
372 }
373 out2.close();
374 cout << "program completed !" << endl << endl;
375
376 } // END int main
377
378 ///////////////////////FUNCTION of common tools////////////////////////////
379
380 int check_this_short(int len, char *seqi, int &has_aa2,
381 int NAA, int& aan_no, int *aan_list, INTs *aan_list_no,
382 int *aan_list_backup,
383 INTs *look_and_count,
384 int &hit_no, int libb, int libe,
385 int frg2, int libfb, int libfe, int &iden_no,
386 double aa1_cutoff, double aa2_cutoff, double aan_cutoff,
387 char this_flag, char *NR_flag) {
388
389 static int taap[MAX_UAA*MAX_UAA];
390 static INTs aap_list[MAX_SEQ];
391 static INTs aap_begin[MAX_UAA*MAX_UAA];
392
393 int j, k, j1, c22, sk, mm;
394 int required_aa1 = int (aa1_cutoff* (double) len);
395 int required_aa2 = int (aa2_cutoff* (double) len);
396 int required_aan = int (aan_cutoff* (double) len);
397
398 aan_no = len - NAA + 1;
399 if ( NAA == 2)
400 for (j=0; j<aan_no; j++)
401 aan_list_backup[j] = aan_list[j] = seqi[j]*NAA1 + seqi[j+1];
402 else if ( NAA == 3)
403 for (j=0; j<aan_no; j++)
404 aan_list_backup[j] = aan_list[j] =
405 seqi[j]*NAA2 + seqi[j+1]*NAA1 + seqi[j+2];
406 else if ( NAA == 4)
407 for (j=0; j<aan_no; j++)
408 aan_list_backup[j] = aan_list[j] =
409 seqi[j]*NAA3+seqi[j+1]*NAA2 + seqi[j+2]*NAA1 + seqi[j+3];
410 else if ( NAA == 5)
411 for (j=0; j<aan_no; j++)
412 aan_list_backup[j] = aan_list[j] =
413 seqi[j]*NAA4+seqi[j+1]*NAA3+seqi[j+2]*NAA2+seqi[j+3]*NAA1+seqi[j+4];
414
415 else return FAILED_FUNC;
416
417 quick_sort(aan_list,0,aan_no-1);
418 for(j=0; j<aan_no; j++) aan_list_no[j]=1;
419 for(j=aan_no-1; j; j--) {
420 if (aan_list[j] == aan_list[j-1]) {
421 aan_list_no[j-1] += aan_list_no[j];
422 aan_list_no[j]=0;
423 }
424 }
425 // END check_aan_list
426
427
428 // lookup_aan
429 for (j=libfe; j>=libfb; j--) look_and_count[j]=0;
430 word_table.count_word_no(aan_no, aan_list, aan_list_no, look_and_count);
431
432
433 // contained_in_old_lib()
434 int band_left, band_right, best_score, band_width1, best_sum, len2, best1,sum;
435 int len1 = len - 1;
436 INTs *lookptr;
437
438 char *seqj;
439 int flag = 0; // compare to old lib
440 for (j=libe; j>=libb; j--) {
441 if ( (this_flag & IS_OLD_REP ) &&
442 (NR_flag[NR90_idx[j]] & IS_OLD_REP) ) continue;
443 len2 = NR_len[NR90_idx[j]];
444
445 k = (len2 - NAA) / Frag_size + 1;
446 lookptr = &look_and_count[ NR90f_idx[j] ];
447
448 if ( frg2 >= k ) {
449 best1=0;
450 for (j1=0; j1<k; j1++) best1 += lookptr[j1];
451 }
452 else {
453 sum = 0;
454 for (j1=0; j1<frg2; j1++) sum += lookptr[j1];
455 best1 = sum;
456 for (j1=frg2; j1<k; j1++) {
457 sum += lookptr[j1] - lookptr[j1-frg2];
458 if (sum > best1) best1 = sum;
459 }
460 }
461
462 if ( best1 < required_aan ) continue;
463
464 seqj = NR_seq[NR90_idx[j]];
465
466 if ( has_aa2 == 0 ) { // calculate AAP array
467 for (sk=0; sk<NAA2; sk++) taap[sk] = 0;
468 for (j1=0; j1<len1; j1++) {
469 c22= seqi[j1]*NAA1 + seqi[j1+1];
470 taap[c22]++;
471 }
472 for (sk=0,mm=0; sk<NAA2; sk++) {
473 aap_begin[sk] = mm; mm+=taap[sk]; taap[sk] = 0;
474 }
475 for (j1=0; j1<len1; j1++) {
476 c22= seqi[j1]*NAA1 + seqi[j1+1];
477 aap_list[aap_begin[c22]+taap[c22]++] =j1;
478 }
479 has_aa2 = 1;
480 }
481
482 band_width1 = (BAND_width < len+len2-2 ) ? BAND_width : len+len2-2;
483 diag_test_aapn(seqj, len, len2, taap, aap_begin,
484 aap_list, best_sum,
485 band_width1, band_left, band_right, required_aa1);
486 if ( best_sum < required_aa2 ) continue;
487
488 local_band_align(seqi, seqj, len, len2, mat,
489 best_score, iden_no, band_left, band_right);
490 if ( iden_no < required_aa1 ) continue;
491 if ( (iden_no * 100 / len ) < NR_clstr100 ) continue;
492
493 flag = 1; break; // else flag = 1, and break loop
494 }
495 hit_no = j;
496 return flag;
497 // END contained_in_old_lib()
498 } // END check_this_short
499
500
501 int add_in_lookup_table_short(int aan_no, int frg1,
502 int *aan_list, INTs *aan_list_no) {
503 int i, j, k, fra;
504
505 for (i=0; i<frg1; i++) {
506 k = (i+1)*Frag_size < aan_no ? (i+1)*Frag_size-1: aan_no-1;
507 quick_sort(aan_list, i*Frag_size, k);
508 }
509 for(j=aan_no-1; j; j--) {
510 if (aan_list[j] == aan_list[j-1]) {
511 aan_list_no[j-1] += aan_list_no[j];
512 aan_list_no[j]=0;
513 }
514 }
515 // END check_aan_list
516
517 for (i=0; i<aan_no; i+=Frag_size) {
518 k = Frag_size < (aan_no-i) ? Frag_size : (aan_no -i);
519 fra=i/Frag_size;
520 word_table.add_word_list(k, aan_list+i, aan_list_no+i, NR90f_no+fra);
521 }
522
523 return 0;
524 } // END add_in_lookup_table
525
526
527 /////////////////////////// END ALL ////////////////////////