112 |
|
void convertQ64(GStr& q); |
113 |
|
|
114 |
|
int main(int argc, char * const argv[]) { |
115 |
< |
GArgs args(argc, argv, "QRDCl:d:3:5:n:r:o:"); |
115 |
> |
GArgs args(argc, argv, "YQRDCl:d:3:5:n:r:o:"); |
116 |
|
int e; |
117 |
|
int icounter=0; //counter for input reads |
118 |
|
int outcounter=0; //counter for output reads |
120 |
|
GMessage("%s\nInvalid argument: %s\n", USAGE, argv[e]); |
121 |
|
exit(224); |
122 |
|
} |
123 |
< |
debug=(args.getOpt('D')!=NULL); |
123 |
> |
debug=(args.getOpt('Y')!=NULL); |
124 |
|
phred64=(args.getOpt('Q')!=NULL); |
125 |
|
doCollapse=(args.getOpt('C')!=NULL); |
126 |
|
doDust=(args.getOpt('D')!=NULL); |
601 |
|
// be at coordinate 0 |
602 |
|
|
603 |
|
bool extendMatch(const char* a, int alen, int ai, |
604 |
< |
const char* b, int blen, int bi, int mlen, int& l5, int& l3) { |
604 |
> |
const char* b, int blen, int bi, int mlen, int& l5, int& l3, bool end5=false) { |
605 |
|
//so the alignment starts at ai in a, bi in b, with a perfect match of length mlen |
606 |
< |
//GStr dbg(a); |
606 |
> |
//if (debug) |
607 |
> |
// GStr dbg(b); |
608 |
|
//GMessage(">> in %s\n\textending hit: %s at position %d\n", a, (dbg.substr(ai, mlen)).chars(), ai); |
609 |
|
int a_l=ai; //alignment coordinates on a |
610 |
|
int a_r=ai+mlen-1; |
614 |
|
int bi_maxscore=bi; |
615 |
|
int score=mlen*a_m_score; |
616 |
|
int maxscore=score; |
617 |
< |
int mism5score=a_mis_score-2; //increase penalty for mismatches at 5' end |
617 |
> |
int mism5score=a_mis_score; |
618 |
> |
if (end5 && ai<(alen>>1)) mism5score-=2; // increase penalty for mismatches at 5' end |
619 |
|
//try to extend to the left first, if possible |
620 |
|
while (ai>0 && bi>0) { |
621 |
|
ai--; |
630 |
|
} |
631 |
|
a_l=ai_maxscore; |
632 |
|
b_l=bi_maxscore; |
633 |
+ |
//if (debug) GMessage(" after l-extend: %*s%s\t\t(score=%d)\n",a_l," ",dbg.substr(b_l,b_r-b_l+1).chars(),maxscore); |
634 |
|
//now extend to the right |
635 |
|
ai_maxscore=a_r; |
636 |
|
bi_maxscore=b_r; |
664 |
|
} |
665 |
|
a_r=ai_maxscore; |
666 |
|
b_r=bi_maxscore; |
667 |
< |
if (maxscore>=a_min_score && (alen-a_r<3 || blen-b_r<3 || a_l<2 || b_l<2)) { |
668 |
< |
if (a_l<alen-a_r-1) { |
667 |
> |
int a_ovh3=alen-a_r-1; |
668 |
> |
int b_ovh3=blen-b_r-1; |
669 |
> |
int mmovh3=(a_ovh3<b_ovh3)? a_ovh3 : b_ovh3; |
670 |
> |
int mmovh5=(a_l<b_l)? a_l : b_l; |
671 |
> |
//if (debug) GMessage(" after r-extend: %*s%s\t\t(score=%d)\n",a_l," ",dbg.substr(b_l,b_r-b_l+1).chars(),maxscore); |
672 |
> |
if (maxscore>=a_min_score && mmovh3<2 && mmovh5<2) { |
673 |
> |
if (a_l<a_ovh3) { |
674 |
|
//adapter closer to the left end (typical for 5' adapter) |
675 |
|
l5=a_r+1; |
676 |
|
l3=alen-1; |
733 |
|
} |
734 |
|
|
735 |
|
bool trim_adapter5(GStr& seq, int&l5, int &l3) { |
736 |
+ |
//if (debug) GMessage("trim_adapter5 on: %s\n", seq.chars()); |
737 |
|
int rlen=seq.length(); |
738 |
|
l5=0; |
739 |
|
l3=rlen-1; |
758 |
|
//l5=a5len-4; |
759 |
|
//while (fi+l5<a5len && l5<l3 && adapter5[fi+l5]==seq[l5]) l5++; |
760 |
|
//return true; |
761 |
+ |
//if (debug) GMessage(" first 12char of the read match adaptor!\n"); |
762 |
|
extendMatch(seq.chars(), rlen, 1, |
763 |
< |
adapter5.chars(), a5len, fi, 12, l5,l3); |
763 |
> |
adapter5.chars(), a5len, fi, 12, l5,l3, true); |
764 |
|
return true; |
765 |
|
} |
766 |
|
|
770 |
|
if (apstart<0) { apstart=0; aplen=a5len-1; } |
771 |
|
GStr bstr=adapter5.substr(apstart, aplen); |
772 |
|
if ((fi=seq.index(bstr))>=0) { |
773 |
+ |
//if (debug) GMessage(" last 12char of adaptor match the read!\n"); |
774 |
|
extendMatch(seq.chars(), rlen, fi, |
775 |
< |
adapter5.chars(), a5len, apstart, aplen, l5,l3); |
775 |
> |
adapter5.chars(), a5len, apstart, aplen, l5,l3,true); |
776 |
|
return true; |
777 |
|
} |
778 |
|
//no easy cases, find a triplet match as a seed for alignment extension |
781 |
|
apstart=a5len-iw-4; |
782 |
|
GStr aword=adapter5.substr(apstart,3); |
783 |
|
if ((fi=seq.index(aword))>=0) { |
784 |
+ |
//if (debug) GMessage("extending wmatch :%*s\n", fi+3, aword.chars()); |
785 |
|
if (extendMatch(seq.chars(), rlen, fi, adapter5.chars(), |
786 |
< |
a5len, apstart, 3, l5,l3)) { |
786 |
> |
a5len, apstart, 3, l5,l3,true)) { |
787 |
|
return true; |
788 |
|
} |
789 |
|
} |