ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/fqtrim/fqtrim.cpp
(Generate patch)
# Line 4 | Line 4
4   #include "GList.hh"
5   #include <ctype.h>
6   #include "GAlnExtend.h"
7 + #include <inttypes.h>
8  
9   #define USAGE "Usage:\n\
10 < fqtrim [{-5 <5adapter> -3 <3adapter>|-f <adapters_file>}] [-a <min_matchlen>]\\\n\
11 <   [-q <minq> [-t <trim_max_len>]] [-p {64|33}] [-o <outsuffix>]\\\n\
10 > fqtrim [{-5 <5adaptor> -3 <3adaptor>|-f <adaptors_file>}] [-a <min_matchlen>]\\\n\
11 >   [-R] [-q <minq> [-t <trim_max_len>]] [-p {64|33}] [-o <outsuffix>]\\\n\
12     [-l <minlen>] [-C] [-D] [-Q] [-n <rename_prefix>] [-r <discarded.lst>]\\\n\
13      <input.fq>[,<input_mates.fq>\n\
14   \n\
15 < Trim low quality bases at the 3' end and can trim adapter sequence(s), filter\n\
15 > Trim low quality bases at the 3' end and can trim adaptor sequence(s), filter\n\
16   for low complexity and collapse duplicate reads.\n\
17   If read pairs should be trimmed and kept together (i.e. without discarding\n\
18   one read in a pair), the two file names should be given delimited by a comma\n\
# Line 25 | Line 26
26      file(s) named <input>.<outsuffix> which will be created in the \n\
27      current (working) directory; (writes to stdout if -o- is given);\n\
28      a suffix ending with .gz, .gzip or .bz2 will enforce compression\n\
29 < -f  file with adapter sequences to trim, each line having this format:\n\
30 <    <5'-adapter-sequence> <3'-adapter-sequence>\n\
31 < -5  trim the given adapter or primer sequence at the 5' end of each read\n\
29 > -f  file with adaptor sequences to trim, each line having this format:\n\
30 >    <5'-adaptor-sequence> <3'-adaptor-sequence>\n\
31 > -5  trim the given adaptor or primer sequence at the 5' end of each read\n\
32      (e.g. -5 CGACAGGTTCAGAGTTCTACAGTCCGACGATC)\n\
33 < -3  trim the given adapter sequence at the 3' end of each read\n\
33 > -3  trim the given adaptor sequence at the 3' end of each read\n\
34      (e.g. -3 TCGTATGCCGTCTTCTGCTTG)\n\
35   -A  disable polyA/T trimming (enabled by default)\n\
36   -y  minimum length of exact match to adaptor sequence at the proper end (6)\n\
# Line 41 | Line 42
42   -r  write a simple \"trash report\" file listing the discarded reads with a\n\
43      one letter code for the reason why they were trashed\n\
44   -D  apply a low-complexity (dust) filter and discard any read that has over\n\
45 <    50% of its length detected as low complexity\n\
45 >    50%% of its length detected as low complexity\n\
46   -C  collapse duplicate reads and add a prefix with their count to the read\n\
47      name (e.g. for microRNAs)\n\
48   -p  input is phred64/phred33 (use -p64 or -p33)\n\
49   -Q  convert quality values to the other Phred qv type\n\
50 < -V  verbose processing\n\
50 > -V  be verbose when done (print summary counts)\n\
51   "
52 <
52 >
53   //-z  for -o option, the output stream(s) will be first piped into the given\n
54   //   <zcmd> command, which must output to stdout (e.g. -z 'bzip2 -9 -c')\n
55  
56 <
57 < // example 3' adapter for miRNAs: TCGTATGCCGTCTTCTGCTTG
56 >
57 > // example 3' adaptor for miRNAs: TCGTATGCCGTCTTCTGCTTG
58  
59   //For paired reads sequencing:
60   //3' : ACACTCTTTCCCTACACGACGCTCTTCCGATCT
# Line 62 | Line 63
63   //FILE* f_out2=NULL; //for paired reads
64   //FILE* f_in=NULL; //input fastq (stdin if not provided)
65   //FILE* f_in2=NULL; //for paired reads
66 +
67   FILE* freport=NULL;
68  
69   bool debug=false;
# Line 71 | Line 73
73   bool doPolyTrim=true;
74   bool fastaOutput=false;
75   bool trashReport=false;
76 < //bool rawFormat=false;
76 > bool revCompl=false; //also reverse complement adaptor sequences
77   int min_read_len=16;
78   double max_perc_N=7.0;
79   int dust_cutoff=16;
80   bool isfasta=false;
81   bool convert_phred=false;
82 < GStr outsuffix; // -o
82 > GStr outsuffix; // -o
83   GStr prefix;
84   GStr zcmd;
85 < int num_trimmed5=0;
86 < int num_trimmed3=0;
87 < int num_trimmedN=0;
88 < int num_trimmedQ=0;
89 < int min_trimmed5=INT_MAX;
90 < int min_trimmed3=INT_MAX;
85 > char isACGT[256];
86 >
87 > uint num_trimN=0; //reads trimmed by N%
88 > uint num_trimQ=0; //reads trimmed by qv threshold
89 > uint num_trimV=0; //reads trimmed by adapter match
90 > uint num_trimA=0; //reads trimmed by polyA
91 > uint num_trimT=0; //reads trimmed by polyT
92 > uint num_trim5=0; //number of reads trimmed at 5' end
93 > uint num_trim3=0; //number of reads trimmed at 3' end
94 >
95 > uint64 b_totalIn=0; //total number of input bases
96 > uint64 b_totalN=0;  //total number of undetermined bases found in input bases
97 >
98 > uint64 b_trimN=0; //total number of bases trimmed due to N-trim
99 > uint64 b_trimQ=0; //number of bases trimmed due to qv threshold
100 > uint64 b_trimV=0; //total number of bases trimmed due to adaptor matches
101 > uint64 b_trimA=0; //number of bases trimmed due to poly-A tails
102 > uint64 b_trimT=0; //number of bases trimmed due to poly-T tails
103 > uint64 b_trim5=0; //total bases trimmed on the 5' side
104 > uint64 b_trim3=0; //total bases trimmed on the 3' side
105 > //int min_trimmed5=INT_MAX;
106 > //int min_trimmed3=INT_MAX;
107  
108   int qvtrim_qmin=0;
109   int qvtrim_max=0;  //for -q, do not trim at 3'-end more than this number of bases
# Line 93 | Line 111
111   int qv_cvtadd=0; //could be -31 or +31
112  
113   // adaptor matching metrics -- for X-drop ungapped extension
114 + //const int match_reward=2;
115 + //const int mismatch_penalty=3;
116   const int match_reward=2;
117 < const int mismatch_penalty=3;
118 < const int Xdrop=8;
117 > const int mismatch_penalty=4;
118 > const int Xdrop=10;
119  
120   const int poly_m_score=2; //match score
121   const int poly_mis_score=-3; //mismatch
# Line 104 | Line 124
124  
125   const char *polyA_seed="AAAA";
126   const char *polyT_seed="TTTT";
127 < GVec<GStr> adapters5;
128 < GVec<GStr> adapters3;
127 >
128 > struct CASeqData {
129 >   //positional data for every possible hexamer in an adaptor
130 >   GVec<uint16>* pz[4096]; //0-based coordinates of all possible hexamers in the adaptor sequence
131 >   GVec<uint16>* pzr[4096]; //0-based coordinates of all possible hexamers for the reverse complement of the adaptor sequence
132 >   GStr seq; //actual adaptor sequence data
133 >   GStr seqr; //reverse complement sequence
134 >   int amlen; //fraction of adaptor length matching that's
135 >              //enough to consider the alignment
136 >   GAlnTrimType trim_type;
137 >   bool use_reverse;
138 >   CASeqData(bool rev=false):seq(),seqr(),
139 >             amlen(0), use_reverse(rev) {
140 >     trim_type=galn_None; //should be updated later!
141 >     for (int i=0;i<4096;i++) {
142 >        pz[i]=NULL;
143 >        pzr[i]=NULL;
144 >        }
145 >     }
146 >
147 >   void update(const char* s) {
148 >         seq=s;
149 >         table6mers(seq.chars(), seq.length(), pz);
150 >         amlen=iround(double(seq.length())*0.8);
151 >         if (amlen<12)
152 >                amlen=12;
153 >         if (!use_reverse) return;
154 >         //reverse complement
155 >         seqr=s;
156 >         int slen=seq.length();
157 >         for (int i=0;i<slen;i++)
158 >           seqr[i]=ntComplement(seq[slen-i-1]);
159 >         table6mers(seqr.chars(), seqr.length(), pzr);
160 >   }
161 >
162 >   ~CASeqData() {
163 >     for (int i=0;i<4096;i++) {
164 >       delete pz[i];
165 >       delete pzr[i];
166 >     }
167 >   }
168 > };
169 >
170 > GVec<CASeqData> adaptors5;
171 > GVec<CASeqData> adaptors3;
172  
173   CGreedyAlignData* gxmem_l=NULL;
174   CGreedyAlignData* gxmem_r=NULL;
# Line 158 | Line 221
221  
222   GHash<FqDupRec> dhash; //hash to keep track of duplicates
223  
224 < int loadAdapters(const char* fname);
224 > void addAdaptor(GVec<CASeqData>& adaptors, GStr& seq, GAlnTrimType trim_type);
225 > int loadAdaptors(const char* fname);
226  
227 < void setupFiles(FILE*& f_in, FILE*& f_in2, FILE*& f_out, FILE*& f_out2,
228 <                       GStr& s, GStr& infname, GStr& infname2);
227 > void setupFiles(FILE*& f_in, FILE*& f_in2, FILE*& f_out, FILE*& f_out2,
228 >                       GStr& s, GStr& infname, GStr& infname2);
229   // uses outsuffix to generate output file names and open file handles as needed
230 <
230 >
231   void writeRead(FILE* f_out, GStr& rname, GStr& rinfo, GStr& rseq, GStr& rqv, int& outcounter);
232   void trash_report(char trashcode, GStr& rname, FILE* freport);
233  
234 < bool getFastxRec(GLineReader& fq, GStr& rseq, GStr& rqv,
234 > bool getFastxRec(GLineReader& fq, GStr& rseq, GStr& rqv,
235            GStr& rname, GStr& rinfo, GStr& infname);
236  
237 < char process_read(GStr& rname, GStr& rseq, GStr& rqv, int &l5, int &l3);
237 > char process_read(GStr& rname, GStr& rseq, GStr& rqv, int &l5, int &l3);
238   //returns 0 if the read was untouched, 1 if it was trimmed and a trash code if it was trashed
239  
240   bool ntrim(GStr& rseq, int &l5, int &l3); //returns true if any trimming occured
# Line 178 | Line 242
242   int dust(GStr& seq);
243   bool trim_poly5(GStr &seq, int &l5, int &l3, const char* poly_seed); //returns true if any trimming occured
244   bool trim_poly3(GStr &seq, int &l5, int &l3, const char* poly_seed);
245 < bool trim_adapter5(GStr& seq, int &l5, int &l3); //returns true if any trimming occured
246 < bool trim_adapter3(GStr& seq, int &l5, int &l3);
245 > bool trim_adaptor5(GStr& seq, int &l5, int &l3); //returns true if any trimming occured
246 > bool trim_adaptor3(GStr& seq, int &l5, int &l3);
247  
248   void convertPhred(char* q, int len);
249   void convertPhred(GStr& q);
250  
251   int main(int argc, char * const argv[]) {
252 <  GArgs args(argc, argv, "YQDCVAl:d:3:5:m:n:r:p:q:f:t:o:z:a:");
252 >  GArgs args(argc, argv, "YQDCRVAl:d:3:5:m:n:r:p:q:f:t:o:z:a:");
253    int e;
254    if ((e=args.isError())>0) {
255        GMessage("%s\nInvalid argument: %s\n", USAGE, argv[e]);
# Line 196 | Line 260
260    convert_phred=(args.getOpt('Q')!=NULL);
261    doCollapse=(args.getOpt('C')!=NULL);
262    doDust=(args.getOpt('D')!=NULL);
263 +  revCompl=(args.getOpt('R')!=NULL);
264    if (args.getOpt('A')) doPolyTrim=false;
265    /*
266    rawFormat=(args.getOpt('R')!=NULL);
# Line 205 | Line 270
270    */
271    prefix=args.getOpt('n');
272    GStr s=args.getOpt('l');
273 <  if (!s.is_empty())
273 >  if (!s.is_empty())
274       min_read_len=s.asInt();
275    s=args.getOpt('m');
276 <  if (!s.is_empty())
276 >  if (!s.is_empty())
277       max_perc_N=s.asDouble();
278    s=args.getOpt('d');
279    if (!s.is_empty()) {
# Line 234 | Line 299
299          qv_phredtype=64;
300          qv_cvtadd=-31;
301          }
302 <       else
302 >       else
303           GMessage("%s\nInvalid value for -p option (can only be 64 or 33)!\n",USAGE);
304       }
305 +  memset((void*)isACGT, 0, 256);
306 +  isACGT['A']=isACGT['a']=isACGT['C']=isACGT['c']=1;
307 +  isACGT['G']=isACGT['g']=isACGT['T']=isACGT['t']=1;
308    s=args.getOpt('f');
309    if (!s.is_empty()) {
310 <   loadAdapters(s.chars());
310 >   loadAdaptors(s.chars());
311     }
312 <  bool fileAdapters=adapters5.Count()+adapters3.Count();
312 >  bool fileAdaptors=adaptors5.Count()+adaptors3.Count();
313    s=args.getOpt('5');
314    if (!s.is_empty()) {
315 <    if (fileAdapters)
315 >    if (fileAdaptors)
316        GError("Error: options -5 and -f cannot be used together!\n");
317      s.upper();
318 <    adapters5.Add(s);
318 >    addAdaptor(adaptors5, s, galn_TrimLeft);
319      }
320    s=args.getOpt('3');
321    if (!s.is_empty()) {
322 <    if (fileAdapters)
322 >    if (fileAdaptors)
323        GError("Error: options -3 and -f cannot be used together!\n");
324        s.upper();
325 <      adapters3.Add(s);
325 >      addAdaptor(adaptors3, s, galn_TrimRight);
326      }
327    s=args.getOpt('y');
328    if (!s.is_empty()) {
329       int minmatch=s.asInt();
330       poly_minScore=minmatch*poly_m_score;
331       }
332 <  
332 >
333    if (args.getOpt('o')!=NULL) outsuffix=args.getOpt('o');
334                           else outsuffix="-";
335    trashReport=  (args.getOpt('r')!=NULL);
# Line 279 | Line 347
347      openfw(freport, args, 'r');
348    char* infile=NULL;
349  
350 <  if (adapters5.Count()>0)
351 <    gxmem_l=new CGreedyAlignData(match_reward, mismatch_penalty, Xdrop-2);
352 <  if (adapters3.Count()>0)
350 >  if (adaptors5.Count()>0)
351 >    //gxmem_l=new CGreedyAlignData(match_reward, mismatch_penalty, Xdrop-2);
352 >        gxmem_l=new CGreedyAlignData(match_reward, mismatch_penalty, Xdrop);
353 >  if (adaptors3.Count()>0)
354      gxmem_r=new CGreedyAlignData(match_reward, mismatch_penalty, Xdrop);
355  
356    while ((infile=args.nextNonOpt())!=NULL) {
# Line 293 | Line 362
362      int trash_N=0;
363      int trash_D=0;
364      int trash_poly=0;
365 <    int trash_A3=0;
366 <    int trash_A5=0;
365 >    int trash_V=0;
366 >    num_trimN=0;
367 >    num_trimQ=0;
368 >    num_trimV=0;
369 >    num_trimA=0;
370 >    num_trimT=0;
371 >    num_trim5=0;
372 >    num_trim3=0;
373 >
374 >    b_totalIn=0;
375 >    b_totalN=0;
376 >    b_trimN=0;
377 >    b_trimQ=0;
378 >    b_trimV=0;
379 >    b_trimA=0;
380 >    b_trimT=0;
381 >    b_trim5=0;
382 >    b_trim3=0;
383 >
384      s=infile;
385      GStr infname;
386      GStr infname2;
# Line 317 | Line 403
403         int a5=0, a3=0, b5=0, b3=0;
404         char tcode=0, tcode2=0;
405         tcode=process_read(seqid, rseq, rqv, a5, a3);
406 +       if (a5>0) {
407 +          b_trim5+=a5;
408 +          num_trim5++;
409 +          }
410 +       if (a3<rseq.length()-1) {
411 +          b_trim3+=rseq.length()-a3-1;
412 +          num_trim3++;
413 +          }
414         if (fq2!=NULL) {
415              getFastxRec(*fq2, rseq2, rqv2, seqid2, seqinfo2, infname2);
416              if (seqid.substr(0,seqid.length()-1)!=seqid2.substr(0,seqid2.length()-1)) {
# Line 324 | Line 418
418                    seqid.chars(), seqid2.chars(), infname.chars(), infname2.chars());
419                 }
420              tcode2=process_read(seqid2, rseq2, rqv2, b5, b3);
421 +            if (b5>0) {
422 +               b_trim5+=b5;
423 +               num_trim5++;
424 +               }
425 +            if (b3<rseq2.length()-1) {
426 +               b_trim3+=rseq2.length()-b3-1;
427 +               num_trim3++;
428 +               }
429              //decide what to do with this pair and print rseq2 if the pair makes it
430              if (tcode>1 && tcode2<=1) {
431                 //"untrash" rseq
# Line 352 | Line 454
454              } //pair read
455         if (tcode>1) { //trashed
456           #ifdef GDEBUG
457 <         GMessage(" !!!!TRASH => 'N'\n");
457 >         GMessage(" !!!!TRASH code = %c\n",tcode);
458           #endif
459            if (tcode=='s') trash_s++;
460            else if (tcode=='A' || tcode=='T') trash_poly++;
461              else if (tcode=='Q') trash_Q++;
462                else if (tcode=='N') trash_N++;
463                 else if (tcode=='D') trash_D++;
464 <                else if (tcode=='3') trash_A3++;
465 <                 else if (tcode=='5') trash_A5++;
464 >                else if (tcode=='V') trash_V++;
465 >                 //else if (tcode=='5') trash_A5++;
466            if (trashReport) trash_report(tcode, seqid, freport);
467            }
468           else if (!doCollapse) { //write it
# Line 399 | Line 501
501                 }
502              }
503           outcounter++;
504 <         if (qd->count>maxdup_count) {
504 >         if (qd->count>maxdup_count) {
505              maxdup_count=qd->count;
506              maxdup_seq=seq;
507              }
508           if (isfasta) {
509             if (prefix.is_empty()) {
510 <             fprintf(f_out, ">%s_x%d\n%s\n", qd->firstname, qd->count,
510 >             fprintf(f_out, ">%s_x%d\n%s\n", qd->firstname, qd->count,
511                             rseq.chars());
512               }
513             else { //use custom read name
# Line 416 | Line 518
518           else { //fastq format
519            if (convert_phred) convertPhred(qd->qv, qd->len);
520            if (prefix.is_empty()) {
521 <            fprintf(f_out, "@%s_x%d\n%s\n+\n%s\n", qd->firstname, qd->count,
521 >            fprintf(f_out, "@%s_x%d\n%s\n+\n%s\n", qd->firstname, qd->count,
522                             rseq.chars(), qd->qv);
523              }
524            else { //use custom read name
# Line 432 | Line 534
534      if (verbose) {
535         if (paired_reads) {
536             GMessage(">Input files : %s , %s\n", infname.chars(), infname2.chars());
537 <           GMessage("Number of input pairs :%9d\n", incounter);
538 <           GMessage("         Output pairs :%9d\n", outcounter);
537 >           GMessage("Number of input pairs :%9u\n", incounter);
538 >           GMessage("         Output pairs :%9u\t(%u trashed)\n", outcounter, incounter-outcounter);
539             }
540           else {
541             GMessage(">Input file : %s\n", infname.chars());
542             GMessage("Number of input reads :%9d\n", incounter);
543 <           GMessage("         Output reads :%9d\n", outcounter);
543 >           GMessage("         Output reads :%9d  (%u trashed)\n", outcounter, incounter-outcounter);
544             }
545 <       GMessage("------------------------------------\n");
546 <       if (num_trimmed5)
547 <          GMessage("           5' trimmed :%9d  (min. trim: %d)\n", num_trimmed5, min_trimmed5);
548 <       if (num_trimmed3)
549 <          GMessage("           3' trimmed :%9d  (min. trim: %d)\n", num_trimmed3, min_trimmed3);
545 >       GMessage("--------------- Read trimming: ------------------\n");
546 >       if (num_trim5)
547 >          GMessage("           5' trimmed :%9u\n", num_trim5);
548 >       if (num_trim3)
549 >          GMessage("           3' trimmed :%9u\n", num_trim3);
550 >       if (num_trimQ)
551 >          GMessage("         q.v. trimmed :%9u\n", num_trimQ);
552 >       if (num_trimN)
553 >          GMessage("            N trimmed :%9u\n", num_trimN);
554 >       if (num_trimT)
555 >          GMessage("       poly-T trimmed :%9u\n", num_trimT);
556 >       if (num_trimA)
557 >          GMessage("       poly-A trimmed :%9u\n", num_trimA);
558 >       if (num_trimV)
559 >          GMessage("      Adapter trimmed :%9u\n", num_trimV);
560         GMessage("------------------------------------\n");
561         if (trash_s>0)
562 <         GMessage("     Trashed by length:%9d\n", trash_s);
562 >         GMessage("Trashed by initial len:%9d\n", trash_s);
563         if (trash_N>0)
564           GMessage("         Trashed by N%%:%9d\n", trash_N);
565         if (trash_Q>0)
566           GMessage("Trashed by low quality:%9d\n", trash_Q);
567         if (trash_poly>0)
568           GMessage("   Trashed by poly-A/T:%9d\n", trash_poly);
569 <       if (trash_A5>0)
570 <         GMessage(" Trashed by 5' adapter:%9d\n", trash_A5);
571 <       if (trash_A3>0)
572 <         GMessage(" Trashed by 3' adapter:%9d\n", trash_A3);
569 >       if (trash_V>0)
570 >         GMessage("    Trashed by adaptor:%9d\n", trash_V);
571 >       GMessage("\n--------------- Base counts  --------------------\n");
572 >       GMessage("    Input bases   :%12llu\n", b_totalIn);
573 >       double percN=100.0* ((double)b_totalN/(double)b_totalIn);
574 >       GMessage("          N bases :%12llu (%4.2f%%)\n", b_totalN, percN);
575 >       GMessage("   trimmed from 5':%12llu\n", b_trim5);
576 >       GMessage("   trimmed from 3':%12llu\n", b_trim3);
577 >       GMessage("\n");
578 >       if (b_trimQ)
579 >       GMessage("     q.v. trimmed :%12llu\n", b_trimQ);
580 >       if (b_trimN)
581 >       GMessage("        N trimmed :%12llu\n", b_trimN);
582 >       if (b_trimT)
583 >       GMessage("   poly-T trimmed :%12llu\n", b_trimT);
584 >       if (b_trimA)
585 >       GMessage("   poly-A trimmed :%12llu\n", b_trimA);
586 >       if (b_trimV)
587 >       GMessage("  Adapter trimmed :%12llu\n", b_trimV);
588 >
589         }
590      if (trashReport) {
591            FWCLOSE(freport);
# Line 481 | Line 609
609     const char* seq;
610     bool valid;
611     NData() {
612 +    seqlen=0;
613      NCount=0;
614      end5=0;
615      end3=0;
# Line 511 | Line 640
640       perc_N=(n*100.0)/(end5-end3+1);
641       }
642   };
643 <
643 >
644   static NData feat;
645   int perc_lenN=12; // incremental distance from ends, in percentage of
646            // sequence length, where N-trimming is done (default:12 %) (autolimited to 20)
647 <          
647 >
648   void N_analyze(int l5, int l3, int p5, int p3) {
649   /* assumes feat was filled properly */
650   int old_dif, t5,t3,v;
651   if (l3<l5+2 || p5>p3 ) {
652     feat.end5=l5+1;
653     feat.end3=l3+1;
654 <   return;
654 >   return;
655     }
656  
657   t5=feat.NPos[p5]-l5;
658   t3=l3-feat.NPos[p3];
659   old_dif=p3-p5;
660   v=(int)((((double)(l3-l5))*perc_lenN)/100);
661 < if (v>20) v=20; /* enforce N-search limit for very long reads */
661 > if (v>20) v=20; /* enforce N-search limit for very long reads */
662   if (t5 < v ) {
663     l5=feat.NPos[p5]+1;
664     p5++;
# Line 546 | Line 675
675             feat.end3=l3+1;
676             return;
677             }
678 <    else
678 >    else
679        N_analyze(l5,l3, p5,p3);
680   }
681  
# Line 587 | Line 716
716   feat.init(rseq);
717   l5=feat.end5-1;
718   l3=feat.end3-1;
719 < N_analyze(feat.end5-1, feat.end3-1, 0, feat.NCount-1);
719 > N_analyze(feat.end5-1, feat.end3-1, 0, feat.NCount-1);
720   if (l5==feat.end5-1 && l3==feat.end3-1) {
721      if (feat.perc_N>max_perc_N) {
722             feat.valid=false;
# Line 605 | Line 734
734     return true;
735     }
736   feat.N_calc();
737 <
737 >
738   if (feat.perc_N>max_perc_N) {
739        feat.valid=false;
740        l3=l5+1;
# Line 617 | Line 746
746   //--------------- dust functions ----------------
747   class DNADuster {
748   public:
749 <  int dustword;
750 <  int dustwindow;
751 <  int dustwindow2;
749 >  int dustword;
750 >  int dustwindow;
751 >  int dustwindow2;
752    int dustcutoff;
753    int mv, iv, jv;
754    int counts[32*32*32];
# Line 714 | Line 843
843                      }
844             }
845           }
846 < //return first;
846 > //return first;
847   }
848   };
849  
# Line 732 | Line 861
861   return ncount;
862   }
863  
735 int get3mer_value(const char* s) {
736 return (s[0]<<16)+(s[1]<<8)+s[2];
737 }
738
739 int w3_match(int qv, const char* str, int slen, int start_index=0) {
740 if (start_index>=slen || start_index<0) return -1;
741 for (int i=start_index;i<slen-3;i++) {
742   int rv=get3mer_value(str+i);
743   if (rv==qv) return i;
744   }
745 return -1;
746 }
747
748 int w3_rmatch(int qv, const char* str, int slen, int end_index=-1) {
749 if (end_index>=slen) return -1;
750 if (end_index<0) end_index=slen-1;
751 for (int i=end_index-2;i>=0;i--) {
752   int rv=get3mer_value(str+i);
753   if (rv==qv) return i;
754   }
755 return -1;
756 }
757
864   struct SLocScore {
865    int pos;
866    int score;
# Line 796 | Line 902
902   SLocScore loc(ri, poly_m_score<<2);
903   SLocScore maxloc(loc);
904   //extend right
905 < while (ri<rlen-2) {
905 > while (ri<rlen-1) {
906     ri++;
907     if (seq[ri]==polyChar) {
908                  loc.add(ri,poly_m_score);
# Line 892 | Line 998
998   //extend right
999   loc.set(ri, maxloc.score);
1000   maxloc.pos=ri;
1001 < while (ri<rlen-2) {
1001 > while (ri<rlen-1) {
1002     ri++;
1003     if (seq[ri]==polyChar) {
1004                  loc.add(ri,poly_m_score);
# Line 920 | Line 1026
1026   return false;
1027   }
1028  
1029 < bool trim_adapter3(GStr& seq, int&l5, int &l3) {
1030 < if (adapters3.Count()==0) return false;
1029 > bool trim_adaptor3(GStr& seq, int&l5, int &l3) {
1030 > if (adaptors3.Count()==0) return false;
1031   int rlen=seq.length();
1032   l5=0;
1033   l3=rlen-1;
1034   bool trimmed=false;
1035 < GStr wseq(seq.chars());
1035 > GStr wseq(seq);
1036   int wlen=rlen;
1037 < for (int ai=0;ai<adapters3.Count();ai++) {
1038 <  if (adapters3[ai].is_empty()) continue;
1039 <  int alen=adapters3[ai].length();
1040 <  GStr& aseq=adapters3[ai];
1041 <  GXAlnInfo* r_bestaln=match_RightEnd(aseq.chars(), alen, wseq.chars(), wlen, gxmem_r, 74);
1042 <  if (r_bestaln) {
1043 <     trimmed=true;
1044 <     //keep unmatched region on the left, if any
1045 <     l3-=(wlen-r_bestaln->sl+1);
1046 <     delete r_bestaln;
1047 <     if (l3<0) l3=0;
1048 <     if (l3-l5+1<min_read_len) return true;
1049 <     wseq=seq.substr(l5,l3-l5+1);
1050 <     wlen=wseq.length();
1037 > GXSeqData seqdata;
1038 > int numruns=revCompl ? 2 : 1;
1039 > GList<GXAlnInfo> bestalns(true, true, false);
1040 > for (int ai=0;ai<adaptors3.Count();ai++) {
1041 >   for (int r=0;r<numruns;r++) {
1042 >     if (r) {
1043 >          seqdata.update(adaptors3[ai].seqr.chars(), adaptors3[ai].seqr.length(),
1044 >                 adaptors3[ai].pzr, wseq.chars(), wlen, adaptors3[ai].amlen);
1045 >        }
1046 >     else {
1047 >            seqdata.update(adaptors3[ai].seq.chars(), adaptors3[ai].seq.length(),
1048 >                 adaptors3[ai].pz, wseq.chars(), wlen, adaptors3[ai].amlen);
1049 >        }
1050 >     GXAlnInfo* aln=match_adaptor(seqdata, adaptors3[ai].trim_type, gxmem_r, 86);
1051 >         if (aln) {
1052 >           if (aln->strong) {
1053 >                   trimmed=true;
1054 >                   bestalns.Add(aln);
1055 >                   break; //will check the rest next time
1056 >                   }
1057 >            else bestalns.Add(aln);
1058 >           }
1059 >   }//forward and reverse adaptors
1060 >   if (trimmed) break; //will check the rest in the next cycle
1061 >  }//for each 3' adaptor
1062 > if (bestalns.Count()>0) {
1063 >           GXAlnInfo* aln=bestalns[0];
1064 >           if (aln->sl-1 > wlen-aln->sr) {
1065 >                   //keep left side
1066 >                   l3-=(wlen-aln->sl+1);
1067 >                   if (l3<0) l3=0;
1068 >                   }
1069 >           else { //keep right side
1070 >                   l5+=aln->sr;
1071 >                   if (l5>=rlen) l5=rlen-1;
1072 >                   }
1073 >           //delete aln;
1074 >           //if (l3-l5+1<min_read_len) return true;
1075 >           wseq=seq.substr(l5,l3-l5+1);
1076 >           wlen=wseq.length();
1077 >           return true; //break the loops here to report a good find
1078       }
1079 <  }//for each 5' adapter
947 <  return trimmed;
1079 >  return false;
1080   }
1081  
1082 < bool trim_adapter5(GStr& seq, int&l5, int &l3) {
1083 < if (adapters5.Count()==0) return false;
1082 > bool trim_adaptor5(GStr& seq, int&l5, int &l3) {
1083 > if (adaptors5.Count()==0) return false;
1084   int rlen=seq.length();
1085   l5=0;
1086   l3=rlen-1;
1087   bool trimmed=false;
1088 < GStr wseq(seq.chars());
1088 > GStr wseq(seq);
1089   int wlen=rlen;
1090 < for (int ai=0;ai<adapters5.Count();ai++) {
1091 <  if (adapters5[ai].is_empty()) continue;
1092 <  int alen=adapters5[ai].length();
1093 <  GStr& aseq=adapters5[ai];
1094 <  GXAlnInfo* l_bestaln=match_LeftEnd(aseq.chars(), alen, wseq.chars(), wlen, gxmem_l, 84);
1095 <  if (l_bestaln) {
1096 <     trimmed=true;
1097 <     l5+=l_bestaln->sr;
1098 <     delete l_bestaln;
1099 <     if (l5>=rlen) l5=rlen-1;
1100 <     if (l3-l5+1<min_read_len) return true;
1101 <     wseq=seq.substr(l5,l3-l5+1);
1102 <     wlen=wseq.length();
1090 > GXSeqData seqdata;
1091 > int numruns=revCompl ? 2 : 1;
1092 > GList<GXAlnInfo> bestalns(true, true, false);
1093 > for (int ai=0;ai<adaptors5.Count();ai++) {
1094 >   for (int r=0;r<numruns;r++) {
1095 >     if (r) {
1096 >          seqdata.update(adaptors5[ai].seqr.chars(), adaptors5[ai].seqr.length(),
1097 >                 adaptors5[ai].pzr, wseq.chars(), wlen, adaptors5[ai].amlen);
1098 >        }
1099 >     else {
1100 >            seqdata.update(adaptors5[ai].seq.chars(), adaptors5[ai].seq.length(),
1101 >                 adaptors5[ai].pz, wseq.chars(), wlen, adaptors5[ai].amlen);
1102 >        }
1103 >         GXAlnInfo* aln=match_adaptor(seqdata, adaptors5[ai].trim_type, gxmem_l, 90);
1104 >         if (aln) {
1105 >           if (aln->strong) {
1106 >                   trimmed=true;
1107 >                   bestalns.Add(aln);
1108 >                   break; //will check the rest next time
1109 >                   }
1110 >            else bestalns.Add(aln);
1111 >           }
1112 >         } //forward and reverse?
1113 >   if (trimmed) break; //will check the rest in the next cycle
1114 >  }//for each 5' adaptor
1115 >  if (bestalns.Count()>0) {
1116 >           GXAlnInfo* aln=bestalns[0];
1117 >           if (aln->sl-1 > wlen-aln->sr) {
1118 >                   //keep left side
1119 >                   l3-=(wlen-aln->sl+1);
1120 >                   if (l3<0) l3=0;
1121 >                   }
1122 >           else { //keep right side
1123 >                   l5+=aln->sr;
1124 >                   if (l5>=rlen) l5=rlen-1;
1125 >                   }
1126 >           //delete aln;
1127 >           //if (l3-l5+1<min_read_len) return true;
1128 >           wseq=seq.substr(l5,l3-l5+1);
1129 >           wlen=wseq.length();
1130 >           return true; //break the loops here to report a good find
1131       }
1132 <  }//for each 5' adapter
973 <  return trimmed;
1132 >  return false;
1133   }
1134  
1135 < //convert qvs to/from phred64 from/to phread33
1135 > //convert qvs to/from phred64 from/to phread33
1136   void convertPhred(GStr& q) {
1137   for (int i=0;i<q.length();i++) q[i]+=qv_cvtadd;
1138   }
# Line 982 | Line 1141
1141   for (int i=0;i<len;i++) q[i]+=qv_cvtadd;
1142   }
1143  
1144 < bool getFastxRec(GLineReader& fq, GStr& rseq, GStr& rqv,
1144 > bool getFastxRec(GLineReader& fq, GStr& rseq, GStr& rqv,
1145            GStr& rname, GStr& rinfo, GStr& infname) {
1146   rseq="";
1147   rqv="";
# Line 998 | Line 1157
1157        } //raw qseq format
1158   else { // FASTQ or FASTA */
1159   isfasta=(l[0]=='>');
1160 < if (!isfasta && l[0]!='@') GError("Error: fasta/fastq record marker not found(%s)\n%s\n",
1160 > if (!isfasta && l[0]!='@') GError("Error: fasta/fastq record marker not found(%s)\n%s\n",
1161        infname.chars(), l);
1162   GStr s(l);
1163   rname=&(l[1]);
1164   for (int i=0;i<rname.length();i++)
1165 <    if (rname[i]<=' ') {
1166 <       if (i<rname.length()-2) rinfo=rname.substr(i+1);
1167 <       rname.cut(i);
1168 <       break;
1165 >    if (rname[i]<=' ') {
1166 >       if (i<rname.length()-2) rinfo=rname.substr(i+1);
1167 >       rname.cut(i);
1168 >       break;
1169         }
1170    //now get the sequence
1171 < if ((l=fq.getLine())==NULL)
1171 > if ((l=fq.getLine())==NULL)
1172        GError("Error: unexpected EOF after header for read %s (%s)\n",
1173                     rname.chars(), infname.chars());
1174   rseq=l; //this must be the DNA line
1175   while ((l=fq.getLine())!=NULL) {
1176        //seq can span multiple lines
1177        if (l[0]=='>' || l[0]=='+') {
1178 <           fq.pushBack();
1178 >           fq.pushBack();
1179             break; //
1180             }
1181        rseq+=l;
1182 <      } //check for multi-line seq
1182 >      } //check for multi-line seq
1183   if (!isfasta) { //reading fastq quality values, which can also be multi-line
1184      if ((l=fq.getLine())==NULL)
1185          GError("Error: unexpected EOF after sequence for %s\n", rname.chars());
1186      if (l[0]!='+') GError("Error: fastq qv header marker not detected!\n");
1187 <    if ((l=fq.getLine())==NULL)
1187 >    if ((l=fq.getLine())==NULL)
1188          GError("Error: unexpected EOF after qv header for %s\n", rname.chars());
1189      rqv=l;
1190 <    //if (rqv.length()!=rseq.length())
1190 >    //if (rqv.length()!=rseq.length())
1191      //  GError("Error: qv len != seq len for %s\n", rname.chars());
1192      while (rqv.length()<rseq.length() && ((l=fq.getLine())!=NULL)) {
1193        rqv+=l; //append to qv string
# Line 1041 | Line 1200
1200  
1201   #ifdef GDEBUG
1202   void showTrim(GStr& s, int l5, int l3) {
1203 <  if (l5>0) {
1203 >  if (l5>0 || l3==0) {
1204      color_bg(c_red);
1205      }
1206    for (int i=0;i<s.length()-1;i++) {
1207      if (i && i==l5) color_resetbg();
1208      fprintf(stderr, "%c", s[i]);
1209 <    if (i==l3) color_bg(c_red);
1209 >    if (i && i==l3) color_bg(c_red);
1210     }
1211    fprintf(stderr, "%c", s[s.length()-1]);
1212    color_reset();
# Line 1056 | Line 1215
1215   #endif
1216  
1217   char process_read(GStr& rname, GStr& rseq, GStr& rqv, int &l5, int &l3) {
1218 < //returns 0 if the read was untouched, 1 if it was just trimmed
1218 > //returns 0 if the read was untouched, 1 if it was just trimmed
1219   // and a trash code if it was trashed
1220   l5=0;
1221   l3=rseq.length()-1;
# Line 1072 | Line 1231
1231   GStr wqv(rqv.chars());
1232   int w5=l5;
1233   int w3=l3;
1234 + //count Ns
1235 + b_totalIn+=rseq.length();
1236 + for (int i=0;i<rseq.length();i++) {
1237 + if (isACGT[(int)rseq[i]]==0) b_totalN++;
1238 + }
1239 +
1240   //first do the q-based trimming
1241   if (qvtrim_qmin!=0 && !wqv.is_empty() && qtrim(wqv, w5, w3)) { // qv-threshold trimming
1242 +   b_trimQ+=(w5-l5)+(l3-w3);
1243 +   num_trimQ++;
1244     if (w3-w5+1<min_read_len) {
1245       return 'Q'; //invalid read
1246       }
# Line 1086 | Line 1253
1253     } //qv trimming
1254   // N-trimming on the remaining read seq
1255   if (ntrim(wseq, w5, w3)) {
1256 +   //Note: ntrim resets w5 and w3
1257     //GMessage("before: %s\n",wseq.chars());
1258     //GMessage("after : %s (%d)\n",wseq.substr(w5,w3-w5+1).chars(),w3-w5+1);
1259 +   int trim3=(wseq.length()-1-w3);
1260 +   b_trimN+=w5+trim3;
1261 +   num_trimN++;
1262     l5+=w5;
1263 <   l3-=(wseq.length()-1-w3);
1263 >   l3-=trim3;
1264     if (w3-w5+1<min_read_len) {
1265       return 'N'; //to be trashed
1266       }
# Line 1101 | Line 1272
1272     w3=wseq.length()-1;
1273     }
1274   char trim_code;
1275 + //clean the more dirty end first - 3'
1276 + bool trimmedA=false;
1277 + bool trimmedT=false;
1278 + bool trimmedV=false;
1279 +
1280 + int prev_w5=0;
1281 + int prev_w3=0;
1282 + bool w3upd=true;
1283 + bool w5upd=true;
1284   do {
1285    trim_code=0;
1286 <  if (trim_poly5(wseq, w5, w3, polyA_seed)) {
1286 >  if (w3upd && trim_poly3(wseq, w5, w3, polyA_seed)) {
1287        trim_code='A';
1288 +      b_trimA+=(w5+(wseq.length()-1-w3));
1289 +      if (!trimmedA) { num_trimA++; trimmedA=true; }
1290        }
1291 <  else if (trim_poly5(wseq, w5, w3, polyT_seed)) {
1291 >  else if (w3upd && trim_poly3(wseq, w5, w3, polyT_seed)) {
1292        trim_code='T';
1293 +      b_trimT+=(w5+(wseq.length()-1-w3));
1294 +      if (!trimmedT) { num_trimT++; trimmedT=true; }
1295        }
1296 <  else if (trim_adapter5(wseq, w5, w3)) {
1113 <      trim_code='5';
1114 <      }
1115 <  if (trim_code) {
1116 <     #ifdef GDEBUG
1117 <      GMessage("#### TRIM by '%c' code ( w5-w3 = %d-%d ):\n",trim_code, w5,w3);
1118 <      showTrim(wseq, w5, w3);
1119 <     #endif
1120 <     int trimlen=wseq.length()-(w3-w5+1);
1121 <     num_trimmed5++;
1122 <     if (trimlen<min_trimmed5)
1123 <         min_trimmed5=trimlen;
1124 <     l5+=w5;
1125 <     l3-=(wseq.length()-1-w3);
1126 <     if (w3-w5+1<min_read_len) {
1127 <         return trim_code;
1128 <         }
1129 <      //-- keep only the w5..w3 range
1130 <      wseq=wseq.substr(w5, w3-w5+1);
1131 <      if (!wqv.is_empty())
1132 <         wqv=wqv.substr(w5, w3-w5+1);
1133 <      }// trimmed at 5' end
1134 < } while (trim_code);
1135 <
1136 < do {
1137 <  trim_code=0;
1138 <  if (trim_poly3(wseq, w5, w3, polyA_seed)) {
1296 >  else if (w5upd && trim_poly5(wseq, w5, w3, polyA_seed)) {
1297        trim_code='A';
1298 +      b_trimA+=(w5+(wseq.length()-1-w3));
1299 +      if (!trimmedA) { num_trimA++; trimmedA=true; }
1300        }
1301 <  else if (trim_poly3(wseq, w5, w3, polyT_seed)) {
1301 >  else if (w5upd && trim_poly5(wseq, w5, w3, polyT_seed)) {
1302        trim_code='T';
1303 +      b_trimT+=(w5+(wseq.length()-1-w3));
1304 +      if (!trimmedT) { num_trimT++; trimmedT=true; }
1305        }
1306 <  else if (trim_adapter3(wseq, w5, w3)) {
1307 <      trim_code='3';
1306 >  else if (trim_adaptor5(wseq, w5, w3)) {
1307 >      trim_code='V';
1308 >      b_trimV+=(w5+(wseq.length()-1-w3));
1309 >      if (!trimmedV) { num_trimV++; trimmedV=true; }
1310 >      }
1311 >  else if (trim_adaptor3(wseq, w5, w3)) {
1312 >      trim_code='V';
1313 >      b_trimV+=(w5+(wseq.length()-1-w3));
1314 >      if (!trimmedV) { num_trimV++; trimmedV=true; }
1315        }
1316    if (trim_code) {
1317 <     #ifdef GDEBUG
1317 >     w3upd=(w3!=prev_w3);
1318 >         w5upd=(w5!=prev_w5);
1319 >         if (w3upd) prev_w3=w3;
1320 >         if (w5upd) prev_w5=w5;
1321 >   #ifdef GDEBUG
1322       GMessage("#### TRIM by '%c' code ( w5-w3 = %d-%d ):\n",trim_code, w5,w3);
1323       showTrim(wseq, w5, w3);
1324 <     #endif
1325 <     int trimlen=wseq.length()-(w3-w5+1);
1153 <     num_trimmed3++;
1154 <     if (trimlen<min_trimmed3)
1155 <         min_trimmed3=trimlen;
1324 >   #endif
1325 >     //int trimlen=wseq.length()-(w3-w5+1);
1326       l5+=w5;
1327       l3-=(wseq.length()-1-w3);
1328       if (w3-w5+1<min_read_len) {
# Line 1164 | Line 1334
1334           wqv=wqv.substr(w5, w3-w5+1);
1335        }//trimming at 3' end
1336   } while (trim_code);
1167
1168
1337   if (doCollapse) {
1338     //keep read for later
1339     FqDupRec* dr=dhash.Find(wseq.chars());
1340     if (dr==NULL) { //new entry
1341 <          //if (prefix.is_empty())
1342 <             dhash.Add(wseq.chars(),
1341 >          //if (prefix.is_empty())
1342 >             dhash.Add(wseq.chars(),
1343                    new FqDupRec(&wqv, rname.chars()));
1344            //else dhash.Add(wseq.chars(), new FqDupRec(wqv.chars(),wqv.length()));
1345           }
# Line 1205 | Line 1373
1373         fprintf(f_out, "%s\n", rseq.chars()); //plain one-line fasta for now
1374         }
1375        else {
1376 <       fprintf(f_out, ">%s%08d\n%s\n", prefix.chars(), outcounter,
1376 >       fprintf(f_out, ">%s%08d\n%s\n", prefix.chars(), outcounter,
1377                            rseq.chars());
1378         }
1379       }
# Line 1216 | Line 1384
1384         fprintf(f_out, "%s\n+\n%s\n", rseq.chars(), rqv.chars());
1385         }
1386        else
1387 <       fprintf(f_out, "@%s_%08d\n%s\n+\n%s\n", prefix.chars(), outcounter,
1387 >       fprintf(f_out, "@%s_%08d\n%s\n+\n%s\n", prefix.chars(), outcounter,
1388                            rseq.chars(),rqv.chars() );
1389       }
1390   }
# Line 1316 | Line 1484
1484      }
1485   }
1486  
1487 + void addAdaptor(GVec<CASeqData>& adaptors, GStr& seq, GAlnTrimType trim_type) {
1488 + //TODO: prepare CASeqData here, and collect hexamers as well
1489 +  if (seq.is_empty() || seq=="-" ||
1490 +          seq=="N/A" || seq==".") return;
1491 +
1492 + CASeqData adata(revCompl);
1493 + int idx=adaptors.Add(adata);
1494 + if (idx<0) GError("Error: failed to add adaptor!\n");
1495 + adaptors[idx].trim_type=trim_type;
1496 + adaptors[idx].update(seq.chars());
1497 + }
1498 +
1499  
1500 < int loadAdapters(const char* fname) {
1500 > int loadAdaptors(const char* fname) {
1501    GLineReader lr(fname);
1502    char* l;
1503    while ((l=lr.nextLine())!=NULL) {
# Line 1333 | Line 1513
1513        #ifdef GDEBUG
1514            //s.reverse();
1515        #endif
1516 <        adapters3.Add(s);
1516 >        addAdaptor(adaptors3, s, galn_TrimRight);
1517          continue;
1518          }
1519        }
# Line 1342 | Line 1522
1522        s.startTokenize("\t ;,:");
1523        GStr a5,a3;
1524        if (s.nextToken(a5))
1525 <         s.nextToken(a3);
1525 >            s.nextToken(a3);
1526 >        else continue; //no tokens on this line
1527 >      GAlnTrimType ttype5=galn_TrimLeft;
1528        a5.upper();
1529        a3.upper();
1530 +      if (a3.is_empty() || a3==a5 || a3=="=") {
1531 +         a3.clear();
1532 +         ttype5=galn_TrimEither;
1533 +         }
1534       #ifdef GDEBUG
1535       //   a5.reverse();
1536       //   a3.reverse();
1537       #endif
1538 <      adapters5.Add(a5);
1539 <      adapters3.Add(a3);
1538 >      addAdaptor(adaptors5, a5, ttype5);
1539 >      addAdaptor(adaptors3, a3, galn_TrimRight);
1540        }
1541     }
1542 <   return adapters5.Count()+adapters3.Count();
1542 >   return adaptors5.Count()+adaptors3.Count();
1543   }
1544  
1545 < void setupFiles(FILE*& f_in, FILE*& f_in2, FILE*& f_out, FILE*& f_out2,
1546 <                       GStr& s, GStr& infname, GStr& infname2) {
1545 > void setupFiles(FILE*& f_in, FILE*& f_in2, FILE*& f_out, FILE*& f_out2,
1546 >                       GStr& s, GStr& infname, GStr& infname2) {
1547   // uses outsuffix to generate output file names and open file handles as needed
1548   infname="";
1549   infname2="";
# Line 1385 | Line 1571
1571   s.startTokenize(",:");
1572   s.nextToken(infname);
1573   bool paired=s.nextToken(infname2);
1574 < if (fileExists(infname.chars())==0)
1574 > if (fileExists(infname.chars())==0)
1575      GError("Error: cannot find file %s!\n",infname.chars());
1576   GStr fname(getFileName(infname.chars()));
1577   GStr picmd;
# Line 1407 | Line 1593
1593   if (!paired) return;
1594   if (doCollapse) GError("Error: sorry, -C option cannot be used with paired reads!\n");
1595   // ---- paired reads:-------------
1596 < if (fileExists(infname2.chars())==0)
1596 > if (fileExists(infname2.chars())==0)
1597       GError("Error: cannot find file %s!\n",infname2.chars());
1598   picmd="";
1599   GStr fname2(getFileName(infname2.chars()));

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines