ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/fqtrim/fqtrim.cpp
(Generate patch)
# Line 7 | Line 7
7   #include <inttypes.h>
8  
9   #define USAGE "Usage:\n\
10 < fqtrim [{-5 <5adaptor> -3 <3adaptor>|-f <adaptors_file>}] [-a <min_matchlen>]\\\n\
10 > fqtrim [{-5 <5adaptor> -3 <3adaptor>|-f <adaptors_file>}] [-a <min_match>]\\\n\
11     [-R] [-q <minq> [-t <trim_max_len>]] [-p {64|33}] [-o <outsuffix>]\\\n\
12     [-l <minlen>] [-C] [-D] [-Q] [-n <rename_prefix>] [-r <discarded.lst>]\\\n\
13 <    <input.fq>[,<input_mates.fq>\n\
13 >   [-a <min_poly>] [-A] <input.fq>[,<input_mates.fq>\n\
14   \n\
15   Trim low quality bases at the 3' end and can trim adaptor sequence(s), filter\n\
16   for low complexity and collapse duplicate reads.\n\
# Line 32 | Line 32
32      (e.g. -5 CGACAGGTTCAGAGTTCTACAGTCCGACGATC)\n\
33   -3  trim the given adaptor sequence at the 3' end of each read\n\
34      (e.g. -3 TCGTATGCCGTCTTCTGCTTG)\n\
35 + -a  minimum exact match to adaptor sequence to trim at the end (6)\n\
36   -A  disable polyA/T trimming (enabled by default)\n\
37 < -y  minimum length of exact match to adaptor sequence at the proper end (6)\n\
37 > -y  minimum length of poly-A/T run to remove (6)\n\
38   -q  trim bases with quality value lower than <minq> (starting at the 3' end)\n\
39   -t  for -q option, maximum trimming at the 3' end is limited to <trim_max_len>\n\
40   -m  maximum percentage of Ns allowed in a read after trimming (default 7)\n\
# Line 113 | Line 114
114   // adaptor matching metrics -- for X-drop ungapped extension
115   //const int match_reward=2;
116   //const int mismatch_penalty=3;
117 < const int match_reward=2;
118 < const int mismatch_penalty=4;
119 < const int Xdrop=10;
117 > int match_reward=1;
118 > int mismatch_penalty=5;
119 > int Xdrop=16;
120 > int minEndAdapter=6;
121 >
122  
123   const int poly_m_score=2; //match score
124   const int poly_mis_score=-3; //mismatch
# Line 147 | Line 150
150     void update(const char* s) {
151           seq=s;
152           table6mers(seq.chars(), seq.length(), pz);
153 <         amlen=iround(double(seq.length())*0.8);
151 <         if (amlen<12)
152 <                amlen=12;
153 >         amlen=calc_safelen(seq.length());
154           if (!use_reverse) return;
155           //reverse complement
156           seqr=s;
# Line 249 | Line 250
250   void convertPhred(GStr& q);
251  
252   int main(int argc, char * const argv[]) {
253 <  GArgs args(argc, argv, "YQDCRVAl:d:3:5:m:n:r:p:q:f:t:o:z:a:");
253 >  GArgs args(argc, argv, "MATCH=MISMATCH=XDROP=YQDCRVAl:d:3:5:m:n:r:p:q:f:t:o:z:a:y:");
254    int e;
255    if ((e=args.isError())>0) {
256        GMessage("%s\nInvalid argument: %s\n", USAGE, argv[e]);
# Line 288 | Line 289
289    if (!s.is_empty()) {
290       qvtrim_max=s.asInt();
291       }
292 +  s=args.getOpt("MATCH");
293 +  if (!s.is_empty())
294 +            match_reward=s.asInt();
295 +  s=args.getOpt("MISMATCH");
296 +  if (!s.is_empty())
297 +            mismatch_penalty=s.asInt();
298 +  s=args.getOpt("XDROP");
299 +  if (!s.is_empty())
300 +            Xdrop=s.asInt();
301 +
302    s=args.getOpt('p');
303    if (!s.is_empty()) {
304       int v=s.asInt();
# Line 327 | Line 338
338    s=args.getOpt('y');
339    if (!s.is_empty()) {
340       int minmatch=s.asInt();
341 <     poly_minScore=minmatch*poly_m_score;
341 >     if (minmatch>2)
342 >        poly_minScore=minmatch*poly_m_score;
343 >     else GMessage("Warning: invalid -y option, ignored.\n");
344       }
345 +  s=args.getOpt('a');
346 +  if (!s.is_empty()) {
347 +     int minmatch=s.asInt();
348 +     if (minmatch>2)
349 +        minEndAdapter=minmatch;
350 +     else GMessage("Warning: invalid -a option, ignored.\n");
351 +     }
352 +
353  
354    if (args.getOpt('o')!=NULL) outsuffix=args.getOpt('o');
355                           else outsuffix="-";
# Line 1047 | Line 1068
1068              seqdata.update(adaptors3[ai].seq.chars(), adaptors3[ai].seq.length(),
1069                   adaptors3[ai].pz, wseq.chars(), wlen, adaptors3[ai].amlen);
1070          }
1071 <     GXAlnInfo* aln=match_adaptor(seqdata, adaptors3[ai].trim_type, gxmem_r, 86);
1071 >     GXAlnInfo* aln=match_adaptor(seqdata, adaptors3[ai].trim_type, minEndAdapter, gxmem_r, 86);
1072           if (aln) {
1073             if (aln->strong) {
1074                     trimmed=true;
# Line 1100 | Line 1121
1121              seqdata.update(adaptors5[ai].seq.chars(), adaptors5[ai].seq.length(),
1122                   adaptors5[ai].pz, wseq.chars(), wlen, adaptors5[ai].amlen);
1123          }
1124 <         GXAlnInfo* aln=match_adaptor(seqdata, adaptors5[ai].trim_type, gxmem_l, 90);
1124 >         GXAlnInfo* aln=match_adaptor(seqdata, adaptors5[ai].trim_type,
1125 >                                                       minEndAdapter, gxmem_l, 90);
1126           if (aln) {
1127             if (aln->strong) {
1128                     trimmed=true;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines