ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/fqtrim/fqtrim.cpp
(Generate patch)
# Line 112 | Line 112
112   void convertQ64(GStr& q);
113  
114   int main(int argc, char * const argv[]) {
115 <  GArgs args(argc, argv, "QRDCl:d:3:5:n:r:o:");
115 >  GArgs args(argc, argv, "YQRDCl:d:3:5:n:r:o:");
116    int e;
117    int icounter=0; //counter for input reads
118    int outcounter=0; //counter for output reads
# Line 120 | Line 120
120        GMessage("%s\nInvalid argument: %s\n", USAGE, argv[e]);
121        exit(224);
122        }
123 <  debug=(args.getOpt('D')!=NULL);
123 >  debug=(args.getOpt('Y')!=NULL);
124    phred64=(args.getOpt('Q')!=NULL);
125    doCollapse=(args.getOpt('C')!=NULL);
126    doDust=(args.getOpt('D')!=NULL);
# Line 155 | Line 155
155      }
156  
157    openfw(f_out, args, 'o');
158 +  if (f_out==NULL) f_out=stdout;
159    if (trashReport)
160      openfw(freport, args, 'r');
161    char* infile=NULL;
# Line 333 | Line 334
334           }
335        if (isfasta) {
336          if (prefix.is_empty()) {
337 <          fprintf(f_out, "@%s_x%d\n%s\n", qd->firstname, qd->count,
337 >          fprintf(f_out, ">%s_x%d\n%s\n", qd->firstname, qd->count,
338                          rseq.chars());
339            }
340          else { //use custom read name
341 <          fprintf(f_out, "@%s%08d_x%d\n%s\n", prefix.chars(), outcounter,
341 >          fprintf(f_out, ">%s%08d_x%d\n%s\n", prefix.chars(), outcounter,
342                       qd->count, rseq.chars());
343            }
344          }
# Line 601 | Line 602
602   //     be at coordinate 0
603  
604   bool extendMatch(const char* a, int alen, int ai,
605 <                 const char* b, int blen, int bi, int mlen, int& l5, int& l3) {
605 >                 const char* b, int blen, int bi, int mlen, int& l5, int& l3, bool end5=false) {
606   //so the alignment starts at ai in a, bi in b, with a perfect match of length mlen
607 < //GStr dbg(a);
607 > //if (debug)
608 > //  GStr dbg(b);
609   //GMessage(">> in %s\n\textending hit: %s at position %d\n", a, (dbg.substr(ai, mlen)).chars(), ai);
610   int a_l=ai; //alignment coordinates on a
611   int a_r=ai+mlen-1;
# Line 613 | Line 615
615   int bi_maxscore=bi;
616   int score=mlen*a_m_score;
617   int maxscore=score;
618 < int mism5score=a_mis_score-2; //increase penalty for mismatches at 5' end
618 > int mism5score=a_mis_score;
619 > if (end5 && ai<(alen>>1)) mism5score-=2; // increase penalty for mismatches at 5' end
620   //try to extend to the left first, if possible
621   while (ai>0 && bi>0) {
622     ai--;
# Line 628 | Line 631
631     }
632   a_l=ai_maxscore;
633   b_l=bi_maxscore;
634 + //if (debug) GMessage("  after l-extend: %*s%s\t\t(score=%d)\n",a_l," ",dbg.substr(b_l,b_r-b_l+1).chars(),maxscore);
635   //now extend to the right
636   ai_maxscore=a_r;
637   bi_maxscore=b_r;
# Line 661 | Line 665
665     }
666    a_r=ai_maxscore;
667    b_r=bi_maxscore;
668 <  if (maxscore>=a_min_score && (alen-a_r<3 || blen-b_r<3 || a_l<2 || b_l<2)) {
669 <     if (a_l<alen-a_r-1) {
668 >  int a_ovh3=alen-a_r-1;
669 >  int b_ovh3=blen-b_r-1;
670 >  int mmovh3=(a_ovh3<b_ovh3)? a_ovh3 : b_ovh3;
671 >  int mmovh5=(a_l<b_l)? a_l : b_l;
672 >  //if (debug) GMessage("  after r-extend: %*s%s\t\t(score=%d)\n",a_l," ",dbg.substr(b_l,b_r-b_l+1).chars(),maxscore);
673 >  if (maxscore>=a_min_score && mmovh3<2 && mmovh5<2) {
674 >     if (a_l<a_ovh3) {
675          //adapter closer to the left end (typical for 5' adapter)
676          l5=a_r+1;
677          l3=alen-1;
# Line 725 | Line 734
734   }
735  
736   bool trim_adapter5(GStr& seq, int&l5, int &l3) {
737 + //if (debug) GMessage("trim_adapter5 on: %s\n", seq.chars());
738   int rlen=seq.length();
739   l5=0;
740   l3=rlen-1;
# Line 749 | Line 759
759     //l5=a5len-4;
760     //while (fi+l5<a5len && l5<l3 && adapter5[fi+l5]==seq[l5]) l5++;
761     //return true;
762 +   //if (debug) GMessage(" first 12char of the read match adaptor!\n");
763     extendMatch(seq.chars(), rlen, 1,
764 <                 adapter5.chars(), a5len, fi,  12, l5,l3);
764 >                 adapter5.chars(), a5len, fi,  12, l5,l3, true);
765     return true;
766     }
767    
# Line 760 | Line 771
771   if (apstart<0) { apstart=0; aplen=a5len-1; }
772   GStr bstr=adapter5.substr(apstart, aplen);
773   if ((fi=seq.index(bstr))>=0) {
774 +   //if (debug) GMessage(" last 12char of adaptor match the read!\n");
775     extendMatch(seq.chars(), rlen, fi,
776 <                 adapter5.chars(), a5len, apstart,  aplen, l5,l3);
776 >                 adapter5.chars(), a5len, apstart,  aplen, l5,l3,true);
777     return true;
778     }
779   //no easy cases, find a triplet match as a seed for alignment extension
# Line 770 | Line 782
782     apstart=a5len-iw-4;
783     GStr aword=adapter5.substr(apstart,3);
784     if ((fi=seq.index(aword))>=0) {
785 +      //if (debug) GMessage("extending wmatch :%*s\n", fi+3, aword.chars());
786        if (extendMatch(seq.chars(), rlen, fi, adapter5.chars(),
787 <                   a5len, apstart, 3, l5,l3)) {
787 >                   a5len, apstart, 3, l5,l3,true)) {
788                       return true;
789                       }
790        }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines