ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/fusions.cpp
Revision: 159
Committed: Mon Jan 30 22:38:08 2012 UTC (7 years, 8 months ago) by gpertea
File size: 27610 byte(s)
Log Message:
wip

Line File contents
1 /*
2 * fusions.cpp
3 * TopHat
4 */
5
6 #ifdef HAVE_CONFIG_H
7 #include <config.h>
8 #else
9 #define PACKAGE_VERSION "INTERNAL"
10 #define SVN_REVISION "XXX"
11 #endif
12
13
14 #include <cassert>
15 #include <cstdio>
16 #include <cstring>
17 #include <vector>
18 #include <string>
19 #include <map>
20 #include <algorithm>
21 #include <set>
22 #include <stdexcept>
23 #include <iostream>
24 #include <fstream>
25 #include <seqan/sequence.h>
26 #include <seqan/find.h>
27 #include <seqan/file.h>
28 #include <seqan/modifier.h>
29 // #include <seqan/align.h>
30 #include <getopt.h>
31
32 #include "common.h"
33 #include "bwt_map.h"
34 #include "junctions.h"
35 #include "fusions.h"
36 #include "fragments.h"
37 #include "wiggles.h"
38 #include "tokenize.h"
39 #include "reads.h"
40
41 #include "inserts.h"
42
43 // daehwan - this is redundancy, too!
44 int difference(const string& first, const string& second)
45 {
46 int len = seqan::length(first);
47 if (len != (int)seqan::length(second))
48 return 0;
49
50 int min_value = 10000;
51 short value1[1024] = {0,};
52 short value2[1024] = {0,};
53
54 short* curr = value1;
55 short* prev = value2;
56
57 for (int j = 0; j < len; ++j)
58 {
59 for (int i = 0; i < len; ++i)
60 {
61 int value = 10000;
62 int match = first[i] == second[j] ? 0 : 1;
63
64 // right
65 if (i == 0)
66 value = j * 2 + match;
67 else if (j > 0)
68 value = prev[i] + 2;
69
70 int temp_value = 10000;
71
72 // down
73 if (j == 0)
74 temp_value = i * 2 + match;
75 else if (i > 0)
76 temp_value = curr[i-1] + 2;
77
78 if (temp_value < value)
79 value = temp_value;
80
81 // match
82 if (i > 0 && j > 0)
83 temp_value = prev[i-1] + match;
84
85 if (temp_value < value)
86 value = temp_value;
87
88 curr[i] = value;
89
90 if ((i == len - 1 || j == len - 1) && value < min_value)
91 min_value = value;
92 }
93
94 short* temp = prev;
95 prev = curr;
96 curr = temp;
97 }
98
99 return min_value;
100 }
101
102
103 /**
104 * Add fusions from an alignment to an FusionSet.
105 * This will look for fusion in the alignment specified by bh.
106 * @param bh The bowtie hit to be used to specify alignment infromation.
107 * @param fusions The FusionSet that will be updated with the insertion information from teh alignment.
108 */
109 void fusions_from_alignment(const BowtieHit& bh,
110 FusionSet& fusions,
111 RefSequenceTable& rt,
112 bool update_stat)
113 {
114 RefSequenceTable::Sequence* ref_str1 = rt.get_seq(bh.ref_id());
115 RefSequenceTable::Sequence* ref_str2 = rt.get_seq(bh.ref_id2());
116
117 if (!ref_str1 || !ref_str2)
118 return;
119
120 vector<Fusion> new_fusions;
121 fusions_from_spliced_hit(bh, new_fusions);
122
123 for(size_t i = 0; i < new_fusions.size(); ++i)
124 {
125 Fusion fusion = new_fusions[i];
126 const vector<CigarOp>& cigars = bh.cigar();
127
128 /*
129 * Assume read is in the same direction as fusion.
130 */
131
132 // Find the position of Fusion.
133 size_t fusion_pos = 0;
134 for (; fusion_pos < cigars.size(); ++fusion_pos)
135 {
136 CigarOpCode opcode = cigars[fusion_pos].opcode;
137 if (opcode == FUSION_FF || opcode == FUSION_FR || opcode == FUSION_RF || opcode == FUSION_RR)
138 break;
139 }
140
141 if (fusion_pos <= 0 || fusion_pos + 1 >= cigars.size())
142 continue;
143
144 // For left bases,
145 size_t left_pos = 0;
146 for (int j = (int)fusion_pos - 1; j >= 0; --j)
147 {
148 const CigarOp& cigar = cigars[j];
149 switch (cigar.opcode)
150 {
151 case MATCH:
152 case mATCH:
153 case REF_SKIP:
154 case rEF_SKIP:
155 case DEL:
156 case dEL:
157 left_pos += cigar.length;
158 break;
159 default:
160 break;
161 }
162 }
163
164 // For right bases,
165 size_t right_pos = 0;
166 for (size_t j = fusion_pos + 1; j < cigars.size(); ++j)
167 {
168 const CigarOp& cigar = cigars[j];
169 switch (cigar.opcode)
170 {
171 case MATCH:
172 case mATCH:
173 case REF_SKIP:
174 case rEF_SKIP:
175 case DEL:
176 case dEL:
177 right_pos += cigar.length;
178 break;
179 default:
180 break;
181 }
182 }
183
184 if (left_pos < fusion_anchor_length || right_pos < fusion_anchor_length)
185 continue;
186
187 FusionSet::iterator itr = fusions.find(fusion);
188 if (itr != fusions.end())
189 {
190 itr->second.count += 1;
191 }
192 else
193 {
194 assert(fusion.refid1 != 0xFFFFFFFF);
195 FusionStat fusionStat;
196 fusionStat.count = 1;
197 fusions[fusion] = fusionStat;
198
199 itr = fusions.find(fusion);
200
201 if (!update_stat)
202 {
203 /*
204 * make a reversed fusion.
205 * this is necessary to detect reads that contradict the fusion.
206 */
207
208 FusionStat fusionStat_rev;
209 fusionStat_rev.count = 1;
210 fusionStat_rev.reversed = true;
211
212 Fusion fusion_rev(fusion.refid2, fusion.refid1, fusion.right, fusion.left, fusion.dir);
213 fusions[fusion_rev] = fusionStat_rev;
214 }
215 }
216
217 if (update_stat)
218 {
219 if (itr->second.chr1_seq.length() <= 0)
220 {
221 size_t len = 100;
222 size_t half_len = len / 2;
223 size_t increase = 20;
224
225 if (fusion.left >= half_len && fusion.left + half_len <= seqan::length(*ref_str1) &&
226 fusion.right >= half_len && fusion.right + half_len <= seqan::length(*ref_str2))
227 {
228 seqan::Dna5String left, right;
229
230 if (fusion.dir == FUSION_RF || fusion.dir == FUSION_RR)
231 {
232 left = seqan::infix(*ref_str1, fusion.left - half_len, fusion.left + half_len);
233 seqan::convertInPlace(left, seqan::FunctorComplement<seqan::Dna>());
234 seqan::reverseInPlace(left);
235 }
236 else
237 left = seqan::infix(*ref_str1, fusion.left - half_len + 1, fusion.left + half_len + 1);
238
239 if (fusion.dir == FUSION_FR || fusion.dir == FUSION_RR)
240 {
241 right = seqan::infix(*ref_str2, fusion.right - half_len + 1, fusion.right + half_len + 1);
242 seqan::convertInPlace(right, seqan::FunctorComplement<seqan::Dna>());
243 seqan::reverseInPlace(right);
244 }
245 else
246 right = seqan::infix(*ref_str2, fusion.right - half_len, fusion.right + half_len);
247
248 itr->second.chr1_seq = DnaString_to_string(left);
249 itr->second.chr2_seq = DnaString_to_string(right);
250
251 for (size_t j = 0; j < 5; ++j)
252 {
253 size_t pos = (5 - j - 1) * increase / 2;
254 const string& left_sub = itr->second.chr1_seq.substr(pos, (j+1) * increase);
255 const string& right_sub = itr->second.chr2_seq.substr(pos, (j+1) * increase);
256
257 itr->second.diffs.push_back(difference(left_sub, right_sub));
258 }
259 }
260 }
261
262 assert (bh.ref_id() == itr->first.refid1);
263
264 itr->second.left_ext = max((size_t)itr->second.left_ext, left_pos);
265 itr->second.right_ext = max((size_t)itr->second.right_ext, right_pos);
266
267 for (size_t k = 0; k < left_pos && k < itr->second.left_bases.size(); ++k)
268 {
269 ++(itr->second.left_bases[k]);
270 }
271
272 for (size_t k = 0; k < right_pos && k < itr->second.right_bases.size(); ++k)
273 {
274 ++(itr->second.right_bases[k]);
275 }
276 }
277 }
278 }
279
280 void unsupport_fusions(const BowtieHit& bh, FusionSet& fusions, FusionSet& fusions_ref)
281 {
282 if (bh.fusion_opcode() != FUSION_NOTHING || bh.is_spliced() || bh.read_len() < 40)
283 return;
284
285 FusionSet::iterator lb, ub;
286
287 uint32_t left = bh.left() + 20;
288 uint32_t right = bh.right() - 20;
289
290 lb = fusions_ref.upper_bound(Fusion(0u, 0u, left, 0));
291 ub = fusions_ref.lower_bound(Fusion(0xffffffffu, 0xffffffffu, right, 0xffffffffu));
292 while (lb != ub && lb != fusions_ref.end())
293 {
294 if (lb->first.refid1 == bh.ref_id())
295 {
296 // daehwan
297 #if 0
298 // MCF-7 RPS6KB1 17:57970443-58027925:1 TMEM49 17:57784863-57917950:1
299 if ((lb->first.left == 57992061 && lb->first.right == 57917126) ||
300 (lb->first.left == 57917126 && lb->first.right == 57992061))
301 {
302 const char* dir_str = "ff";
303 if (lb->first.dir == FUSION_FR)
304 dir_str = "fr";
305 else if (lb->first.dir == FUSION_RF)
306 dir_str = "rf";
307
308 cout << "fusion: " << lb->first.left << "-" << lb->first.right << endl;
309 cout << dir_str << endl;
310 cout << bh.insert_id() << ": " << bh.left() << "-" << bh.right() << "\t";
311 cout << print_cigar(bh.cigar()) << " " << (int)bh.edit_dist() << endl;
312 cout << bh.seq() << endl;
313 cout << bh.qual() << endl;
314 cout << endl;
315 }
316 #endif
317
318 FusionSet::iterator itr;
319 if (lb->second.reversed)
320 itr = fusions.find(Fusion(lb->first.refid2, lb->first.refid1, lb->first.right, lb->first.left, lb->first.dir));
321 else
322 itr = fusions.find(lb->first);
323
324 if (itr == fusions.end())
325 {
326 FusionStat fusionStat;
327 fusionStat.unsupport_count = 1;
328 fusions[lb->first] = fusionStat;
329 }
330 else
331 ++(itr->second.unsupport_count);
332 }
333
334 ++lb;
335 }
336 }
337
338 /**
339 */
340 void print_fusions(FILE* fusions_out, FusionSet& fusions, RefSequenceTable& ref_sequences)
341 {
342 // fprintf(fusions_out, "track name=fusions description=\"TopHat fusions\"\n");
343
344 vector<Fusion> vFusion;
345 for (FusionSet::iterator itr = fusions.begin(); itr != fusions.end(); ++itr)
346 {
347 vFusion.push_back(itr->first);
348 }
349 sort(vFusion.begin(), vFusion.end());
350
351 for (size_t i = 0; i < vFusion.size(); ++i)
352 {
353 FusionSet::iterator itr = fusions.find(vFusion[i]);
354 int counts = itr->second.count;
355 if (counts <= 0 || itr->second.reversed)
356 continue;
357
358 const char* dir = "";
359 if (itr->first.dir == FUSION_FF)
360 dir = "ff";
361 else if(itr->first.dir == FUSION_FR)
362 dir = "fr";
363 else if(itr->first.dir == FUSION_RF)
364 dir = "rf";
365 else
366 dir = "rr";
367
368 assert (itr->second.left_bases.size() == itr->second.right_bases.size());
369
370 float symm = 0.0f;
371 for (uint32_t i = 0; i < itr->second.left_bases.size(); ++i)
372 {
373 float term = ((int)itr->second.left_bases[i] - (int)itr->second.right_bases[i]) / (float)counts;
374 symm += (term * term);
375 }
376
377 fprintf(fusions_out, "%s-%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%.6f",
378 ref_sequences.get_name(itr->first.refid1),
379 ref_sequences.get_name(itr->first.refid2),
380 itr->first.left,
381 itr->first.right,
382 dir,
383 counts,
384 itr->second.pair_count,
385 itr->second.pair_count_fusion,
386 itr->second.unsupport_count,
387 itr->second.left_ext,
388 itr->second.right_ext,
389 symm);
390
391 fprintf(fusions_out, "\t@\t");
392
393 for (uint32_t i = 0; i < itr->second.diffs.size(); ++i)
394 {
395 fprintf(fusions_out, "%d ", itr->second.diffs[i]);
396 }
397
398 fprintf(fusions_out, "\t@\t");
399
400 uint32_t half_length = itr->second.chr1_seq.length() / 2;
401 fprintf(fusions_out, "%s %s\t@\t", itr->second.chr1_seq.substr(0, half_length).c_str(), itr->second.chr1_seq.substr(half_length).c_str());
402 fprintf(fusions_out, "%s %s\t@\t", itr->second.chr2_seq.substr(0, half_length).c_str(), itr->second.chr2_seq.substr(half_length).c_str());
403
404 for (uint32_t i = 0; i < itr->second.left_bases.size(); ++i)
405 {
406 fprintf(fusions_out, "%d ", itr->second.left_bases[i]);
407 }
408
409 fprintf(fusions_out, "\t@\t");
410
411 for (uint32_t i = 0; i < itr->second.right_bases.size(); ++i)
412 {
413 fprintf(fusions_out, "%d ", itr->second.right_bases[i]);
414 }
415
416 fprintf(fusions_out, "\t@\t");
417
418 sort(itr->second.vPairSupport.begin(), itr->second.vPairSupport.end());
419
420 for (uint32_t i = 0; i < min((size_t)200, itr->second.vPairSupport.size()); ++i)
421 {
422 fprintf(fusions_out, "%d:%d ", itr->second.vPairSupport[i].ldist, itr->second.vPairSupport[i].rdist);
423 }
424
425 fprintf(fusions_out, "\n");
426 }
427 }
428
429 /**
430 * Extract a list of fusions from a bowtie hit.
431 * Given a bowtie hit, extract a vector of insertions.
432 * @param bh The bowtie hit to use for alignment information.
433 * @param insertions Used to store the resultant vector of insertions.
434 */
435 void fusions_from_spliced_hit(const BowtieHit& bh, vector<Fusion>& fusions, bool auto_sort)
436 {
437 const vector<CigarOp>& cigar = bh.cigar();
438 unsigned int positionInGenome = bh.left();
439
440 for(size_t c = 0; c < cigar.size(); ++c)
441 {
442 Fusion fusion;
443 switch(cigar[c].opcode)
444 {
445 case REF_SKIP:
446 case MATCH:
447 case DEL:
448 positionInGenome += cigar[c].length;
449 break;
450 case rEF_SKIP:
451 case mATCH:
452 case dEL:
453 positionInGenome -= cigar[c].length;
454 break;
455 case FUSION_FF:
456 case FUSION_FR:
457 case FUSION_RF:
458 case FUSION_RR:
459 fusion.dir = cigar[c].opcode;
460 if (fusion.dir == FUSION_RF || fusion.dir == FUSION_RR)
461 positionInGenome = positionInGenome + 1;
462 else
463 positionInGenome = positionInGenome - 1;
464
465 if (bh.ref_id() < bh.ref_id2() ||
466 (bh.ref_id() == bh.ref_id2() && positionInGenome < cigar[c].length) ||
467 !auto_sort)
468 {
469 fusion.refid1 = bh.ref_id();
470 fusion.refid2 = bh.ref_id2();
471 fusion.left = positionInGenome;
472 fusion.right = cigar[c].length;
473 }
474 else
475 {
476 assert (auto_sort);
477 fusion.refid1 = bh.ref_id2();
478 fusion.refid2 = bh.ref_id();
479 fusion.left = cigar[c].length;
480 fusion.right = positionInGenome;
481 }
482
483 fusions.push_back(fusion);
484 break;
485 default:
486 break;
487 }
488 }
489 }
490
491 void pair_support(const HitsForRead& left_hits, const HitsForRead& right_hits, FusionSet& fusions, FusionSet& fusions_ref)
492 {
493 const vector<BowtieHit>& left = left_hits.hits;
494 const vector<BowtieHit>& right = right_hits.hits;
495
496 if (left.size() > fusion_multipairs || left.size() != right.size())
497 return;
498
499 for (size_t i = 0; i < left.size(); ++i)
500 {
501 const BowtieHit& lh = left[i];
502 const BowtieHit& rh = right[i];
503
504 bool left_fusionSpanned = lh.fusion_opcode() != FUSION_NOTHING;
505 bool right_fusionSpanned = rh.fusion_opcode() != FUSION_NOTHING;
506 if (left_fusionSpanned && right_fusionSpanned)
507 continue;
508
509 bool fusionSpanned = left_fusionSpanned || right_fusionSpanned;
510 bool fusion_leftSide = false;
511
512 uint32_t ref_id1 = lh.ref_id2();
513 uint32_t ref_id2 = rh.ref_id();
514
515 int dir = FUSION_FF;
516 if (fusionSpanned)
517 {
518 if (left_fusionSpanned)
519 dir = lh.fusion_opcode();
520 else
521 dir = rh.fusion_opcode();
522 }
523 else
524 {
525 if (!lh.antisense_align() && !rh.antisense_align())
526 dir = FUSION_FR;
527 else if (lh.antisense_align() && rh.antisense_align())
528 dir = FUSION_RF;
529 else
530 {
531 if (lh.ref_id() == rh.ref_id())
532 {
533 if ((lh.antisense_align() && lh.left() > rh.left()) ||
534 (!lh.antisense_align() && lh.left() < rh.left()))
535 dir = FUSION_FF;
536 else
537 dir = FUSION_RR;
538 }
539 else
540 {
541 if ((lh.antisense_align() && lh.ref_id() > rh.ref_id()) ||
542 (!lh.antisense_align() && lh.ref_id() < rh.ref_id()))
543 dir = FUSION_FF;
544 else
545 dir = FUSION_RR;
546 }
547 }
548 }
549
550 FusionSet::iterator lb, ub;
551 bool unsupport = false;
552
553 // int inner_dist = max_report_intron_length * 2;
554
555 int inner_dist = 10000;
556 int outer_dist = 10000 * 2;
557 int max_dist = 10000 * 2;
558 int left1 = 0, left2 = 0, right1 = 0, right2 = 0;
559
560 if (fusionSpanned)
561 {
562 vector<Fusion> new_fusions;
563 if (left_fusionSpanned)
564 fusions_from_spliced_hit(lh, new_fusions);
565 else
566 fusions_from_spliced_hit(rh, new_fusions);
567
568 Fusion& fusion = new_fusions[0];
569 dir = fusion.dir;
570 ref_id1 = fusion.refid1;
571 ref_id2 = fusion.refid2;
572
573 if (left_fusionSpanned && lh.ref_id() != ref_id2 && lh.ref_id2() != ref_id2)
574 unsupport = true;
575
576 if (right_fusionSpanned && rh.ref_id() != ref_id1 && rh.ref_id2() != ref_id1)
577 unsupport = true;
578
579 int temp1, temp2;
580 const BowtieHit* fusionHit;
581 const BowtieHit* otherHit;
582
583 if (left_fusionSpanned)
584 {
585 fusionHit = &lh;
586 otherHit = &rh;
587 }
588 else
589 {
590 fusionHit = &rh;
591 otherHit = &lh;
592 }
593
594 // check the normal hit (otherHit) is on the left hand side of the fusion.
595 if (ref_id1 == ref_id2)
596 {
597 if (dir == FUSION_FF || dir == FUSION_FR)
598 fusion_leftSide = otherHit->left() < fusionHit->left();
599 else if (dir == FUSION_RF)
600 fusion_leftSide = otherHit->left() < fusionHit->right();
601 else
602 fusion_leftSide = otherHit->right() > fusionHit->left();
603 }
604 else
605 fusion_leftSide = fusionHit->ref_id() == otherHit->ref_id();
606
607 // *****
608 // daehwan - make sure what '+' and '-' really mean for FF, FR, RF, RR cases
609 // *****
610 if ((dir != FUSION_RF && dir != FUSION_RR) && fusionHit->antisense_align() && (!fusion_leftSide || otherHit->antisense_align()))
611 unsupport = true;
612
613 if ((dir != FUSION_FR && dir != FUSION_RR) && !fusionHit->antisense_align() && (fusion_leftSide || !otherHit->antisense_align()))
614 unsupport = true;
615
616 if ((dir == FUSION_FR || dir == FUSION_RR) && !fusionHit->antisense_align() && (fusion_leftSide || otherHit->antisense_align()))
617 unsupport = true;
618
619 if ((dir == FUSION_RF || dir == FUSION_RR) && fusionHit->antisense_align() && (!fusion_leftSide || !otherHit->antisense_align()))
620 unsupport = true;
621
622 temp1 = otherHit->left();
623 temp2 = otherHit->right();
624
625 if ((fusion_leftSide && dir == FUSION_RF) || (!fusion_leftSide && dir != FUSION_FR))
626 {
627 temp2 = temp1 + inner_dist;
628 if (temp1 > outer_dist)
629 temp1 = temp1 - outer_dist;
630 else
631 temp1 = 0;
632 }
633 else
634 {
635 if (temp2 >= inner_dist)
636 temp1 = temp2 - inner_dist;
637 else
638 temp1 = 0;
639
640 temp2 = temp2 + outer_dist;
641 }
642
643 if (fusion_leftSide)
644 {
645 left1 = temp1;
646 left2 = temp2;
647 }
648 else
649 {
650 right1 = temp1;
651 right2 = temp2;
652 }
653
654 lb = fusions_ref.find(fusion);
655 ub = fusions_ref.end();
656
657 // daehwan - debug
658 #if 0
659 if (fusion.left == 6994359 && fusion.right == 17581683)
660 {
661 cout << "daehwan - test - pair_with_fusion: " << lh.insert_id() << endl;
662 cout << "edit dist: " << (int)lh.edit_dist() << ", " << (int)rh.edit_dist() << endl;
663 const char* dir_str = "ff";
664 if (dir == FUSION_FR)
665 dir_str = "fr";
666 else if (dir == FUSION_RF)
667 dir_str = "rf";
668 else if (dir == FUSION_RR)
669 dir_str = "rr";
670
671 cout << dir_str << " : " << (lh.antisense_align() ? "-" : "+") << " " << (rh.antisense_align() ? "-" : "+") << endl;
672 cout << lh.ref_id() << ": " << lh.left() << "-" << lh.right() << endl;
673 cout << lh.ref_id2() << ": " << print_cigar(lh.cigar()) << endl;
674 cout << rh.ref_id() << ": " << rh.left() << "-" << rh.right() << endl;
675 cout << rh.ref_id2() << ": " << print_cigar(rh.cigar()) << endl;
676 cout << "found: " << (lb == ub ? "no" : "yes") << endl;
677 cout << "unsupport: " << (unsupport ? "yes" : "no") << endl;
678 cout << "fusion_left: " << (fusion_leftSide ? "yes" : "no") << endl;
679 cout << endl;
680 }
681 #endif
682 }
683 else
684 {
685 if (dir == FUSION_FF)
686 {
687 if (lh.antisense_align())
688 {
689 if (rh.right() >= inner_dist)
690 right1 = rh.right() - inner_dist;
691 right2 = right1 + outer_dist;
692
693 left2 = lh.left() + inner_dist;
694 if (left2 > outer_dist)
695 left1 = left2 - outer_dist;
696 }
697 else
698 {
699 if (lh.right() >= inner_dist)
700 left1 = lh.right() - inner_dist;
701 left2 = left1 + outer_dist;
702
703 right2 = rh.left() + inner_dist;
704 if (right2 > outer_dist)
705 right1 = right2 - outer_dist;
706 }
707 }
708 else if (dir == FUSION_FR)
709 {
710 if (lh.right() >= inner_dist)
711 left1 = lh.right() - inner_dist;
712 left2 = left1 + outer_dist;
713
714 if (rh.right() >= inner_dist)
715 right1 = rh.right() - inner_dist;
716 right2 = right1 + outer_dist;
717 }
718 else if (dir == FUSION_RF)
719 {
720 left2 = lh.left() + inner_dist;
721 right2 = rh.left() + inner_dist;
722
723 if (left2 > outer_dist)
724 left1 = left2 - outer_dist;
725
726 if (right2 > outer_dist)
727 right1 = right2 - outer_dist;
728 }
729 else // if (dir == FUSION_RR)
730 {
731 if (lh.antisense_align())
732 {
733 left2 = lh.left() + inner_dist;
734 if (left2 > outer_dist)
735 left1 = left2 - outer_dist;
736
737 if (rh.right() >= inner_dist)
738 right1 = rh.right() - inner_dist;
739 right2 = right1 + outer_dist;
740 }
741 else
742 {
743 if (lh.right() >= inner_dist)
744 left1 = lh.right() - inner_dist;
745 left2 = left1 + outer_dist;
746
747 right2 = rh.left() + inner_dist;
748 if (right2 > outer_dist)
749 right1 = right2 - outer_dist;
750 }
751 }
752
753 // daehwan - debug
754 #if 0
755 if (fusion.left == 6994359 && fusion.right == 17581683)
756 {
757 const char* dir_str = "ff";
758 if (dir == FUSION_FR)
759 dir_str = "fr";
760 else if (dir == FUSION_RF)
761 dir_str = "rf";
762 else if (dir == FUSION_RR)
763 dir_str = "rr";
764
765 cout << "paired-end from two chromosomes" << endl;
766 cout << "insert id: " << lh.insert_id() << endl;
767 cout << dir_str << " : " << (lh.antisense_align() ? "-" : "+") << " " << (rh.antisense_align() ? "-" : "+") << endl;
768 cout << lh.ref_id() << ": " << lh.left() << "-" << lh.right() << endl;
769 cout << lh.ref_id2() << " " << print_cigar(lh.cigar()) << endl;
770 cout << rh.ref_id() << ": " << rh.left() << "-" << rh.right() << endl;
771 cout << rh.ref_id2() << " " << print_cigar(rh.cigar()) << endl;
772 cout << "left: " << left1 << "-" << left2 << endl;
773 cout << "right: " << right1 << "-" << right2 << endl;
774 cout << endl;
775 }
776 #endif
777
778 if (ref_id1 > ref_id2 || (ref_id1 == ref_id2 && lh.left() > rh.left()))
779 {
780 uint32_t temp = ref_id1;
781 ref_id1 = ref_id2;
782 ref_id2 = temp;
783
784 temp = left1;
785 left1 = right1;
786 right1 = temp;
787
788 temp = left2;
789 left2 = right2;
790 right2 = temp;
791 }
792
793 lb = fusions_ref.upper_bound(Fusion(ref_id1, ref_id2, left1, right1));
794 ub = fusions_ref.lower_bound(Fusion(ref_id1, ref_id2, left2, right2));
795
796 // daehwan
797 #if 0
798 static const uint32_t chr_id1 = RefSequenceTable::hash_string("chr2");
799 static const uint32_t chr_id2 = RefSequenceTable::hash_string("chr3");
800
801 if ((lh.ref_id() == chr_id1 && rh.ref_id() == chr_id2) ||
802 (lh.ref_id() == chr_id2 && rh.ref_id() == chr_id1))
803 {
804 // KPL-4 BSG 19:571325-583492:1 NFIX 19:13106584-13209610:1
805 // const uint32_t left1 = 571325, right1 = 583492, left2 = 13106584, right2 = 13209610;
806
807 // SK-BR-3 DHX35 20:37590942-37668366:1 ITCH 20:32951041-33099198:1
808 // const uint32_t left1 = 37590942, right1 = 37668366, left2 = 32951041, right2 = 33099198;
809
810 // SK-BR-3 NFS1 20:34220262-34287287:-1 PREX1 20:47240790-47444420:-1
811 // const uint32_t left1 = 34220262, right1 = 34287287, left2 = 47240790, right2 = 47444420;
812
813 // VCaP TIA1 2:70436576-70475792:-1 DIRC2 3:122513642-122599986:1
814 uint32_t left1 = 70436576, right1 = 70475792, left2 = 122513642, right2 = 122599986;
815 if (lh.ref_id() != chr_id1)
816 {
817 uint32_t temp = left1;
818 left1 = left2;
819 left2 = temp;
820
821 temp = right1;
822 right1 = right2;
823 right2 = temp;
824 }
825
826 if ((lh.left() >= left1 && lh.left() <= right1 && rh.left() >= left2 && rh.left() <= right2) ||
827 (lh.left() >= left2 && lh.left() <= right2 && rh.left() >= left1 && rh.left() <= right1))
828 {
829 for (size_t t = 0; t < left.size(); ++t)
830 {
831 const BowtieHit& lh = left[t];
832 const BowtieHit& rh = right[t];
833
834 const char* dir_str = "ff";
835 if (dir == FUSION_FR)
836 dir_str = "fr";
837 else if (dir == FUSION_RF)
838 dir_str = "rf";
839 else if (dir == FUSION_RR)
840 dir_str = "rr";
841
842 cout << "paired-end from two chromosomes" << endl;
843 cout << "insert id: " << lh.insert_id() << endl;
844 cout << dir_str << " : " << (lh.antisense_align() ? "-" : "+") << " " << (rh.antisense_align() ? "-" : "+") << endl;
845 cout << lh.ref_id() << ": " << lh.left() << "-" << lh.right() << endl;
846 cout << lh.ref_id2() << " " << print_cigar(lh.cigar()) << endl;
847 cout << rh.ref_id() << ": " << rh.left() << "-" << rh.right() << endl;
848 cout << rh.ref_id2() << " " << print_cigar(rh.cigar()) << endl;
849 cout << "left: " << left1 << "-" << left2 << endl;
850 cout << "right: " << right1 << "-" << right2 << endl;
851 cout << endl;
852 }
853 }
854 }
855 #endif
856 }
857
858 while (lb != ub && lb != fusions_ref.end())
859 {
860 if (lb->first.dir == dir &&
861 lb->first.refid1 == ref_id1 &&
862 lb->first.refid2 == ref_id2 &&
863 ((!fusionSpanned && lb->first.right >= right1 && lb->first.right <= right2) || fusionSpanned) &&
864 !lb->second.reversed)
865 {
866 int dist = 0, left_dist = 0, right_dist = 0;
867
868 // daehwan - check this out
869 if (!fusionSpanned || fusion_leftSide)
870 {
871 if (dir == FUSION_RF || dir == FUSION_RR)
872 left_dist = (left2 - inner_dist) - (int)lb->first.left;
873 else
874 left_dist = (int)lb->first.left - (left1 + inner_dist);
875 }
876
877 if (!fusionSpanned || !fusion_leftSide)
878 {
879 if (dir == FUSION_FR || dir == FUSION_RR)
880 right_dist = (int)lb->first.right - (right1 + inner_dist);
881 else
882 right_dist = (right2 - inner_dist) - (int)lb->first.right;
883 }
884
885 dist = abs(left_dist) + abs(right_dist);
886
887 // daehwan - fix this later
888 if (dist < 0 && fusionSpanned)
889 unsupport = true;
890 bool pass = dist < max_dist;
891
892 // daehwan
893 #if 0
894 // if(!fusionSpanned)
895 // if (pass && !unsupport)
896 if (lb->first.left == 6994359 && lb->first.right == 17581683)
897 {
898 const char* dir_str = "ff";
899 if (dir == FUSION_FR)
900 dir_str = "fr";
901 else if (dir == FUSION_RF)
902 dir_str = "rf";
903 else if (dir == FUSION_RR)
904 dir_str = "rr";
905
906 cout << dir_str << endl;
907 cout << "dist: " << dist << endl;
908 cout << lb->first.refid1 << " " << lb->first.refid2 << endl;
909 cout << lb->first.left << "-" << lb->first.right << endl;
910 cout << "unsupport: " << (unsupport ? "yes" : "no") << endl;
911 cout << "pass: " << (pass ? "yes" : "no") << endl;
912 cout << "ids: " << lh.insert_id() << " : " << rh.insert_id() << endl;
913 cout << endl << endl;
914 }
915 #endif
916
917 FusionSet::iterator itr = fusions.find(lb->first);
918 if (itr != fusions.end())
919 {
920 if (unsupport)
921 ++(itr->second.unsupport_count_pair);
922 else if (pass)
923 {
924 if (fusionSpanned)
925 ++(itr->second.pair_count_fusion);
926 else
927 {
928 if (itr->second.vPairSupport.size() < 200)
929 itr->second.vPairSupport.push_back(FusionPairSupport(left_dist, right_dist));
930 ++(itr->second.pair_count);
931 }
932 }
933 }
934 else
935 {
936 FusionStat fusionStat;
937 if (unsupport)
938 fusionStat.unsupport_count_pair = 1;
939 else if (pass)
940 {
941 if (fusionSpanned)
942 fusionStat.pair_count_fusion = 1;
943 else
944 {
945 fusionStat.vPairSupport.push_back(FusionPairSupport(left_dist, right_dist));
946 fusionStat.pair_count = 1;
947 }
948 }
949
950 fusions[lb->first] = fusionStat;
951 }
952 }
953
954 if (fusionSpanned)
955 break;
956
957 ++lb;
958 }
959 }
960 }
961
962 void merge_with(FusionSimpleSet& fusions, const FusionSimpleSet& other_fusions)
963 {
964 for (FusionSimpleSet::const_iterator other_itr = other_fusions.begin(); other_itr != other_fusions.end(); ++other_itr)
965 {
966 FusionSimpleSet::iterator itr = fusions.find(other_itr->first);
967 if (itr != fusions.end())
968 {
969 FusionSimpleStat& curr = itr->second;
970 curr.merge_with(other_itr->second);
971 }
972 else
973 {
974 fusions[other_itr->first] = other_itr->second;
975 }
976 }
977 }
978
979 void merge_with(FusionSet& fusions, const FusionSet& other_fusions)
980 {
981 for (FusionSet::const_iterator other_itr = other_fusions.begin(); other_itr != other_fusions.end(); ++other_itr)
982 {
983 FusionSet::iterator itr = fusions.find(other_itr->first);
984 if (itr != fusions.end())
985 {
986 FusionStat& curr = itr->second;
987 curr.merge_with(other_itr->second);
988 }
989 else
990 {
991 fusions[other_itr->first] = other_itr->second;
992 }
993 }
994 }

Properties

Name Value
svn:executable *