ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/fqtrim/fqtrim.cpp
(Generate patch)
# Line 5 | Line 5
5   #include <ctype.h>
6  
7   #define USAGE "Usage:\n\
8 < fqtrim [-5 <5adapter>] [-3 <3adapter>] [-a <min_matchlen>] [-p {64|33}] [-q <minq> [-t <trim_max>]]\\\n\
9 <   [-n <rename_prefix>] [-o <outsuffix>] [-z <zcmd>] [-r <discarded.lst>]\\\n\
10 <   [-l <minlen>] [-C] [-D] [-Q] <input.fq>[,<input_mates.fq>\n\
8 > fqtrim [{-5 <5adapter> -3 <3adapter>|-f <adapters_file>}] [-a <min_matchlen>]\\\n\
9 >   [-q <minq> [-t <trim_max_len>]] [-p {64|33}] [-o <outsuffix>]\\\n\
10 >   [-l <minlen>] [-C] [-D] [-Q] [-n <rename_prefix>] [-r <discarded.lst>]\\\n\
11 >    <input.fq>[,<input_mates.fq>\n\
12   \n\
13 < Trim low quality bases at the 3' end, optionally trim adapter sequence, filter\n\
14 < for low complexity and collapse duplicate reads\n\
13 > Trim low quality bases at the 3' end and can trim adapter sequence(s), filter\n\
14 > for low complexity and collapse duplicate reads.\n\
15   If read pairs should be trimmed and kept together (i.e. without discarding\n\
16   one read in a pair), the two file names should be given delimited by a comma\n\
17   or a colon character\n\
# Line 19 | Line 20
20   -n  rename all the reads using the <prefix> followed by a read counter;\n\
21      if -C option was given, the suffix \"_x<N>\" is appended, with <N> being\n\
22      the read duplication count\n\
23 < -o  write the trimmed/filtered reads to file(s) named <input>.<outsuffix>\n\
24 <    which will be created in the current (working) directory\n\
23 > -o  unless this parameter is '-', write the trimmed/filtered reads to \n\
24 >    file(s) named <input>.<outsuffix> which will be created in the \n\
25 >    current (working) directory; (writes to stdout if -o- is given);\n\
26 >    a suffix ending with .gz, .gzip or .bz2 will enforce compression\n\
27 > -f  file with adapter sequences to trim, each line having this format:\n\
28 >    <5'-adapter-sequence> <3'-adapter-sequence>\n\
29   -5  trim the given adapter or primer sequence at the 5' end of each read\n\
30      (e.g. -5 CGACAGGTTCAGAGTTCTACAGTCCGACGATC)\n\
31   -3  trim the given adapter sequence at the 3' end of each read\n\
32      (e.g. -3 TCGTATGCCGTCTTCTGCTTG)\n\
33 < -a  minimum bases to match to adaptor sequence (default 5)\n\
33 > -a  minimum length of exact match to adaptor sequence at the proper end (6)\n\
34   -q  trim bases with quality value lower than <minq> (starting at the 3' end)\n\
35 < -t  for -q option, maximum trimming at the 3' end is limited to <trim_max>\n\
35 > -t  for -q option, maximum trimming at the 3' end is limited to <trim_max_len>\n\
36   -m  maximum percentage of Ns allowed in a read after trimming (default 7)\n\
37   -l  minimum \"clean\" length after trimming that a read must have\n\
38      in order to pass the filter (default: 16)\n\
# Line 48 | Line 53
53  
54   // example 3' adapter for miRNAs: TCGTATGCCGTCTTCTGCTTG
55  
56 < //For pair ends sequencing:
56 > //For paired reads sequencing:
57   //3' : ACACTCTTTCCCTACACGACGCTCTTCCGATCT
58   //5' : GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
59   //FILE* f_out=NULL; //stdout if not provided
# Line 70 | Line 75
75   bool isfasta=false;
76   bool convert_phred=false;
77   GStr outsuffix; // -o
78 < GStr adapter3;
79 < GStr adapter5;
78 > //GStr adapter3;
79 > //GStr adapter5;
80   GStr prefix;
81   GStr zcmd;
82   int num_trimmed5=0;
# Line 92 | Line 97
97   const int a_m_score=2; //match score
98   const int a_mis_score=-3; //mismatch
99   const int a_dropoff_score=7;
100 < int a_min_score=10; //an exact match of 5 bases at the proper ends WILL be trimmed
100 > int a_min_score=12; //an exact match of 6 bases at the proper ends WILL be trimmed
101   const int a_min_chain_score=15; //for gapped alignments
102  
103   class CSegChain;
# Line 331 | Line 336
336   void convertPhred(GStr& q);
337  
338   int main(int argc, char * const argv[]) {
339 <  GArgs args(argc, argv, "YQDCVl:d:3:5:m:n:r:p:q:t:o:z:a:");
339 >  GArgs args(argc, argv, "YQDCVl:d:3:5:m:n:r:p:q:f:t:o:z:a:");
340    int e;
341    if ((e=args.isError())>0) {
342        GMessage("%s\nInvalid argument: %s\n", USAGE, argv[e]);
# Line 397 | Line 402
402       int a_minmatch=s.asInt();
403       a_min_score=a_minmatch<<1;
404       }
405 <
405 >  
406    if (args.getOpt('o')!=NULL) outsuffix=args.getOpt('o');
407 +                         else outsuffix="-";
408    trashReport=  (args.getOpt('r')!=NULL);
409    int fcount=args.startNonOpt();
410    if (fcount==0) {
# Line 1611 | Line 1617
1617   f_out2=NULL;
1618   //analyze outsuffix intent
1619   GStr pocmd;
1620 < GStr ox=getFext(outsuffix);
1621 < if (ox.length()>2) ox=ox.substr(0,2);
1622 < if (ox=="gz") pocmd="gzip -9 -c ";
1623 <   else if (ox=="bz") pocmd="bzip2 -9 -c ";
1620 > if (outsuffix=="-") {
1621 >    f_out=stdout;
1622 >    }
1623 >   else {
1624 >    GStr ox=getFext(outsuffix);
1625 >    if (ox.length()>2) ox=ox.substr(0,2);
1626 >    if (ox=="gz") pocmd="gzip -9 -c ";
1627 >        else if (ox=="bz") pocmd="bzip2 -9 -c ";
1628 >    }
1629   if (s=="-") {
1630      f_in=stdin;
1631      infname="stdin";
# Line 1638 | Line 1649
1649     f_in=popen(picmd.chars(), "r");
1650     if (f_in==NULL) GError("Error at popen %s!\n", picmd.chars());
1651     }
1652 + if (f_out==stdout) {
1653 +   if (paired) GError("Error: output suffix required for paired reads\n");
1654 +   return;
1655 +   }
1656   f_out=prepOutFile(infname, pocmd);
1657   if (!paired) return;
1658   if (doCollapse) GError("Error: sorry, -C option cannot be used with paired reads!\n");

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines