370 |
|
} |
371 |
|
#ifdef GDEBUG |
372 |
|
GMessage(" After trimming:\n"); |
373 |
< |
GMessage("<==%s\n",rseq.chars()); |
373 |
> |
GMessage("%s\n",rseq.chars()); |
374 |
|
#endif |
375 |
|
writeRead(f_out, seqid, seqinfo, rseq, rqv, outcounter); |
376 |
|
} |
834 |
|
maxloc=loc; |
835 |
|
} |
836 |
|
} |
837 |
+ |
li=maxloc.pos; |
838 |
|
if ((maxloc.score>poly_minScore && ri>=rlen-3) || |
839 |
|
(maxloc.score==poly_minScore && ri==rlen-1) || |
840 |
|
(maxloc.score>(poly_minScore<<1) && ri>=rlen-6)) { |
841 |
< |
l5=li; |
842 |
< |
l3=ri; |
841 |
> |
//trimming this li-ri match at 3' end |
842 |
> |
l3=li-1; |
843 |
> |
if (l3<0) l3=0; |
844 |
|
return true; |
845 |
|
} |
846 |
|
return false; |
908 |
|
maxloc=loc; |
909 |
|
} |
910 |
|
} |
911 |
< |
|
911 |
> |
ri=maxloc.pos; |
912 |
|
if ((maxloc.score==poly_minScore && li==0) || |
913 |
|
(maxloc.score>poly_minScore && li<2) |
914 |
|
|| (maxloc.score>(poly_minScore<<1) && li<6)) { |
915 |
< |
l5=li; |
916 |
< |
l3=ri; |
915 |
> |
//adjust l5 to reflect this trimming of 5' end |
916 |
> |
l5=ri+1; |
917 |
> |
if (l5>rlen-1) l5=rlen-1; |
918 |
|
return true; |
919 |
|
} |
920 |
|
return false; |
1039 |
|
return true; |
1040 |
|
} |
1041 |
|
|
1042 |
+ |
#ifdef GDEBUG |
1043 |
+ |
void showTrim(GStr& s, int l5, int l3) { |
1044 |
+ |
if (l5>0) { |
1045 |
+ |
color_bg(c_red); |
1046 |
+ |
} |
1047 |
+ |
for (int i=0;i<s.length()-1;i++) { |
1048 |
+ |
if (i && i==l5) color_resetbg(); |
1049 |
+ |
fprintf(stderr, "%c", s[i]); |
1050 |
+ |
if (i==l3) color_bg(c_red); |
1051 |
+ |
} |
1052 |
+ |
fprintf(stderr, "%c", s[s.length()-1]); |
1053 |
+ |
color_reset(); |
1054 |
+ |
fprintf(stderr, "\n"); |
1055 |
+ |
} |
1056 |
+ |
#endif |
1057 |
+ |
|
1058 |
|
char process_read(GStr& rname, GStr& rseq, GStr& rqv, int &l5, int &l3) { |
1059 |
|
//returns 0 if the read was untouched, 1 if it was just trimmed |
1060 |
|
// and a trash code if it was trashed |
1062 |
|
l3=rseq.length()-1; |
1063 |
|
#ifdef GDEBUG |
1064 |
|
GMessage(">%s\n", rname.chars()); |
1065 |
< |
GMessage("==>%s\n",rseq.chars()); |
1065 |
> |
GMessage("%s\n",rseq.chars()); |
1066 |
|
#endif |
1067 |
|
if (l3-l5+1<min_read_len) { |
1068 |
|
return 's'; |
1104 |
|
trim_code=0; |
1105 |
|
if (trim_poly5(wseq, w5, w3, polyA_seed)) { |
1106 |
|
trim_code='A'; |
1088 |
– |
#ifdef GDEBUG |
1089 |
– |
GMessage("\t-trimmed poly-A at 5' end\n"); |
1090 |
– |
#endif |
1107 |
|
} |
1108 |
|
else if (trim_poly5(wseq, w5, w3, polyT_seed)) { |
1109 |
|
trim_code='T'; |
1094 |
– |
#ifdef GDEBUG |
1095 |
– |
GMessage("\t-trimmed poly-T at 5' end\n"); |
1096 |
– |
#endif |
1110 |
|
} |
1111 |
|
else if (trim_adapter5(wseq, w5, w3)) { |
1112 |
|
trim_code='5'; |
1100 |
– |
#ifdef GDEBUG |
1101 |
– |
GMessage("\t-trimmed adapter at 5' end\n"); |
1102 |
– |
#endif |
1113 |
|
} |
1114 |
|
if (trim_code) { |
1115 |
+ |
#ifdef GDEBUG |
1116 |
+ |
GMessage("#### TRIM by '%c' code ( w5-w3 = %d-%d ):\n",trim_code, w5,w3); |
1117 |
+ |
showTrim(wseq, w5, w3); |
1118 |
+ |
#endif |
1119 |
|
int trimlen=wseq.length()-(w3-w5+1); |
1120 |
|
num_trimmed5++; |
1121 |
|
if (trimlen<min_trimmed5) |
1144 |
|
trim_code='3'; |
1145 |
|
} |
1146 |
|
if (trim_code) { |
1147 |
+ |
#ifdef GDEBUG |
1148 |
+ |
GMessage("#### TRIM by '%c' code ( w5-w3 = %d-%d ):\n",trim_code, w5,w3); |
1149 |
+ |
showTrim(wseq, w5, w3); |
1150 |
+ |
#endif |
1151 |
|
int trimlen=wseq.length()-(w3-w5+1); |
1152 |
|
num_trimmed3++; |
1153 |
|
if (trimlen<min_trimmed3) |