ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/closures.cpp
Revision: 154
Committed: Tue Jan 24 02:29:21 2012 UTC (8 years, 8 months ago) by gpertea
File size: 22466 byte(s)
Log Message:
massive update with Daehwan's work

Line User Rev File contents
1 gpertea 29 /*
2     * closures.cpp
3     * TopHat
4     *
5     * Created by Cole Trapnell on 1/15/09.
6     * Copyright 2009 Cole Trapnell. All rights reserved.
7     *
8     */
9    
10     #ifdef HAVE_CONFIG_H
11     #include <config.h>
12     #endif
13    
14     #include <iostream>
15     #include <fstream>
16     #include <getopt.h>
17 gpertea 154 #include <seqan/sequence.h>
18     #include <seqan/file.h>
19    
20 gpertea 29 #include "bwt_map.h"
21     #include "inserts.h"
22     #include "closures.h"
23     #include "reads.h"
24     #include "tokenize.h"
25 gpertea 154 #include "fusions.h"
26 gpertea 29
27     using namespace seqan;
28     using namespace std;
29    
30    
31     bool possible_cotranscript(const BowtieHit& h1, const BowtieHit& h2, bool check_strand = true)
32     {
33     if (h1.insert_id() != h2.insert_id())
34     return false;
35     int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev;
36     if (max_mate_inner_dist == -1)
37     {
38     max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev;
39     }
40    
41     InsertAlignmentGrade grade(h1,h2, min_mate_inner_dist, max_mate_inner_dist);
42     return (!grade.too_far && !grade.too_close && grade.opposite_strands);
43     }
44    
45    
46     void check_mates(const HitList& hits1_in_ref,
47     const HitList& hits2_in_ref,
48     vector<pair<size_t, size_t> >& happy_mates,
49     vector<size_t>& map1_singletons,
50     vector<size_t>& map2_singletons)
51     {
52     std::set<size_t> marked;
53     // TODO: if this shows up on the profile, replace it with a linear
54     // time algorithm. This one is 2*n*lg(n).
55     HitList::const_iterator last_good = hits2_in_ref.begin();
56    
57     for (size_t i = 0; i < hits1_in_ref.size(); ++i)
58     {
59     pair<HitList::const_iterator, HitList::const_iterator> range_pair;
60     range_pair = equal_range(last_good, hits2_in_ref.end(),
61     hits1_in_ref[i], hit_insert_id_lt);
62     bool found_hit = false;
63     if (range_pair.first != range_pair.second)
64     last_good = range_pair.first;
65     for (HitList::const_iterator f = range_pair.first;
66     f != range_pair.second;
67     ++f)
68     {
69     if (possible_cotranscript(hits1_in_ref[i], *f))
70     {
71     happy_mates.push_back(make_pair(i,f - hits2_in_ref.begin()));
72     marked.insert(f - hits2_in_ref.begin());
73     found_hit = true;
74     }
75     }
76     if (!found_hit)
77     map1_singletons.push_back(i);
78     }
79    
80     for (size_t i = 0; i < hits2_in_ref.size(); ++i)
81     {
82     if (marked.find(i) == marked.end())
83     {
84     map2_singletons.push_back(i);
85     }
86     }
87     }
88    
89 gpertea 154 void find_fusion_closure(HitsForRead& left_hits,
90     HitsForRead& right_hits,
91     std::set<Fusion>& fusions)
92     {
93     if (left_hits.hits.size() > 3 || right_hits.hits.size() > 3)
94     return;
95    
96     for (size_t left_index = 0; left_index < left_hits.hits.size(); ++left_index)
97     {
98     for (size_t right_index = 0; right_index < right_hits.hits.size(); ++right_index)
99     {
100     BowtieHit* leftHit = &left_hits.hits[left_index];
101     BowtieHit* rightHit = &right_hits.hits[right_index];
102    
103     uint32_t dir = FUSION_FF;
104     if (leftHit->antisense_align() != rightHit->antisense_align())
105     dir = FUSION_FF;
106     else if (!leftHit->antisense_align())
107     dir = FUSION_FR;
108     else
109     dir = FUSION_RF;
110    
111     if (dir == FUSION_FF && leftHit->antisense_align())
112     {
113     BowtieHit * tmp = leftHit;
114     leftHit = rightHit;
115     rightHit = tmp;
116     }
117    
118     uint32_t left, right;
119     if (dir == FUSION_FF || dir == FUSION_FR)
120     left = leftHit->right() - 4;
121     else
122     left = leftHit->left() + 4;
123    
124     if (dir == FUSION_FF || dir == FUSION_RF)
125     right = rightHit->left() + 4;
126     else
127     right = rightHit->right() - 4;
128    
129     if (leftHit->ref_id() == rightHit->ref_id() && dir == FUSION_FF)
130     {
131     int dist = 0;
132     dist = rightHit->left() - leftHit->right();
133    
134     if (dist >= 0 && dist <= (int)fusion_min_dist)
135     continue;
136     }
137    
138     uint32_t ref_id1 = leftHit->ref_id();
139     uint32_t ref_id2 = rightHit->ref_id();
140    
141     if (dir == FUSION_FR || dir == FUSION_RF)
142     {
143     if ((ref_id2 < ref_id1) || (ref_id1 == ref_id2 && left > right))
144     {
145     uint32_t temp = ref_id1;
146     ref_id1 = ref_id2;
147     ref_id2 = temp;
148    
149     temp = left;
150     left = right;
151     right = temp;
152     }
153     }
154    
155     fusions.insert(Fusion(ref_id1, ref_id2, left, right, dir));
156     }
157     }
158     }
159    
160 gpertea 29 bool prefer_shorter_pairs = true;
161    
162     typedef pair<InsertAlignmentGrade, vector<InsertAlignment> > BestPairingHits;
163    
164     template<typename Visitor>
165     void visit_best_pairing(HitsForRead& left_hit_group,
166     HitsForRead& right_hit_group,
167     Visitor& visitor)
168     {
169     BestPairingHits insert_best;
170    
171     for (size_t i = 0; i < left_hit_group.hits.size(); ++i)
172     {
173     BowtieHit& h1 = left_hit_group.hits[i];
174    
175     for (size_t j = 0; j < right_hit_group.hits.size(); j++)
176     {
177     BowtieHit& h2 = right_hit_group.hits[j];
178     if (h1.ref_id() != h2.ref_id())
179     continue;
180    
181     uint32_t refid = h1.ref_id();
182    
183     int min_mate_inner_dist = inner_dist_mean - inner_dist_std_dev;
184     if (max_mate_inner_dist == -1)
185     {
186     max_mate_inner_dist = inner_dist_mean + inner_dist_std_dev;
187     }
188     InsertAlignmentGrade s(h1, h2, min_mate_inner_dist, max_mate_inner_dist);
189    
190     //pair<InsertAlignmentGrade, vector<InsertAlignment> >& insert_best
191     // = best_status_for_inserts[curr_left_obs_order];
192     InsertAlignmentGrade& current = insert_best.first;
193     // Is the new status better than the current best one?
194     if (current < s)
195     {
196     insert_best.second.clear();
197     current = s;
198     insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
199     }
200     else if (! (s < current))
201     {
202     if (prefer_shorter_pairs && current.num_mapped == 2)
203     {
204     pair<int, int> dc = pair_distances(*(insert_best.second[0].left_alignment), *(insert_best.second[0].right_alignment));
205     pair<int, int> ds = pair_distances(h1,h2);
206     if (ds.second < dc.second)
207     {
208     //chucked_for_shorter_pair += insert_best.second.size();
209     insert_best.second.clear();
210     current = s;
211     insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
212    
213     }
214     }
215     else
216     {
217     insert_best.second.push_back(InsertAlignment(refid, &h1, &h2));
218     }
219     }
220     }
221     }
222    
223     visitor.visit(insert_best);
224     }
225    
226     class CovMapIntronFinder
227     {
228     std::set<SpliceMotif> _tmp_hits;
229     vector<SpliceMotif> _hits;
230     RefSequenceTable::Sequence* _ref_seq;
231     public:
232     CovMapIntronFinder() : _ref_seq(NULL) {}
233     CovMapIntronFinder(RefSequenceTable::Sequence* ref_seq) :
234     _ref_seq(ref_seq),
235     _lms(vector<bool>(length(*ref_seq), false)),
236     _rms(vector<bool>(length(*ref_seq), false)){}
237    
238     void add_motifs_in_window(int left,
239     int right,
240     const string& left_motif,
241     const string& right_motif)
242     {
243     size_t l_start = max(0,left);
244     for (int i = l_start;
245     i < min(right, (int)length(*_ref_seq) - 2);
246     ++i)
247     {
248     seqan::Infix<RefSequenceTable::Sequence>::Type curr
249     = seqan::infix(*_ref_seq,i, i + 2);
250     if (curr == left_motif)
251     _lms[i] = true;
252     else if (curr == right_motif)
253     _rms[i] = true;
254     }
255     }
256    
257     void finalize()
258     {
259     int pos = 0;
260     for (vector<bool>::iterator itr = _lms.begin();
261     itr != _lms.end();
262     ++itr, ++pos)
263     {
264     if (*itr)
265     _hits.push_back(SpliceMotif(pos, true));
266     }
267    
268     pos = 0;
269     for (vector<bool>::iterator itr = _rms.begin();
270     itr != _rms.end();
271     ++itr, ++pos)
272     {
273     if (*itr)
274     _hits.push_back(SpliceMotif(pos, false));
275     }
276    
277     sort(_hits.begin(), _hits.end());
278     }
279    
280     size_t seq_len() const { return length(*_ref_seq); }
281     const vector<SpliceMotif>& hits() const { return _hits; }
282     vector<bool> _lms;
283     vector<bool> _rms;
284     };
285    
286     typedef CovMapIntronFinder CIF;
287    
288     struct RefCIF
289     {
290     RefCIF() : ref_seq(NULL) {}
291     RefCIF(const CIF& f, const CIF& r, RefSequenceTable::Sequence* s) :
292     fwd_cif(f), rev_cif(r), ref_seq(s) {}
293     CIF fwd_cif;
294     CIF rev_cif;
295     RefSequenceTable::Sequence* ref_seq;
296     };
297    
298     class CoverageMapVisitor
299     {
300     public:
301     map<uint32_t, RefCIF> finders;
302    
303     CoverageMapVisitor(istream& ref_stream,
304     RefSequenceTable& rt)
305     {
306    
307     while(ref_stream.good() &&
308     !ref_stream.eof())
309     {
310     RefSequenceTable::Sequence* ref_str = new RefSequenceTable::Sequence;
311     string name;
312     readMeta(ref_stream, name, Fasta());
313     string::size_type space_pos = name.find_first_of(" \t\r");
314     if (space_pos != string::npos)
315     {
316     name.resize(space_pos);
317     }
318     read(ref_stream, *ref_str, Fasta());
319    
320     uint32_t ref_id = rt.get_id(name, NULL, 0);
321     finders[ref_id] = RefCIF(CIF(ref_str), CIF(ref_str), ref_str);
322     }
323     }
324    
325     void visit(BestPairingHits& pairings)
326     {
327     if (!pairings.first.num_mapped == 2)
328     return;
329    
330     static string fwd_lm("GT");
331     static string fwd_rm("AG");
332     static string rev_lm("CT");
333     static string rev_rm("AC");
334    
335     for (size_t i = 0;
336     i < pairings.second.size();
337     ++i)
338     {
339    
340     InsertAlignment& al = pairings.second[i];
341    
342     BowtieHit& bh_left = *(al.left_alignment);
343     BowtieHit& bh_right = *(al.right_alignment);
344    
345     assert (bh_left.ref_id() == bh_right.ref_id());
346    
347     map<uint32_t, RefCIF >::iterator if_itr = finders.find(bh_left.ref_id());
348     assert (if_itr != finders.end());
349    
350     RefCIF& ref_finders = if_itr->second;
351    
352     pair<int, int> ds = pair_distances(bh_left, bh_right);
353    
354     int minor_hit_start, major_hit_start;
355     int minor_hit_end, major_hit_end;
356     if (bh_left.left() < bh_right.left())
357     {
358     minor_hit_start = (int)bh_left.left();
359     minor_hit_end = (int)bh_left.right();
360     major_hit_start = (int)bh_right.left();
361     major_hit_end = (int)bh_right.right();
362     }
363     else
364     {
365     minor_hit_start = (int)bh_right.left();
366     minor_hit_end = (int)bh_right.right();
367     major_hit_start = (int)bh_left.left();
368     major_hit_end = (int)bh_left.right();
369     }
370    
371     int inner_dist = major_hit_start - minor_hit_end;
372    
373     bool skip_fwd = false;
374     bool skip_rev = false;
375    
376     if (library_type == FR_FIRSTSTRAND)
377     {
378     if (bh_left.antisense_align()) skip_rev = true;
379     else skip_fwd = true;
380     }
381    
382     if (library_type == FR_SECONDSTRAND)
383     {
384     if (bh_left.antisense_align()) skip_fwd = true;
385     else skip_rev = true;
386     }
387    
388     if (inner_dist > inner_dist_mean + 2 * inner_dist_std_dev)
389     {
390     // Forward strand
391     if (!skip_fwd)
392     {
393     ref_finders.fwd_cif.add_motifs_in_window((int)bh_left.left() - 10,
394     bh_left.right() + 10,
395     fwd_lm,
396     fwd_rm);
397    
398     ref_finders.fwd_cif.add_motifs_in_window((int)bh_right.left() - 10,
399     bh_right.right() + 10,
400     fwd_lm,
401     fwd_rm);
402     }
403    
404     // Reverse strand
405     if (!skip_rev)
406     {
407     ref_finders.rev_cif.add_motifs_in_window((int)bh_left.left() - 10,
408     bh_left.right() + 10,
409     rev_lm,
410     rev_rm);
411    
412     ref_finders.rev_cif.add_motifs_in_window((int)bh_right.left() - 10,
413     bh_right.right() + 10,
414     rev_lm,
415     rev_rm);
416     }
417     }
418     }
419     }
420    
421     void finalize()
422     {
423     for (map<uint32_t, RefCIF >::iterator itr = finders.begin();
424     itr != finders.end();
425     ++itr)
426     {
427     itr->second.fwd_cif.finalize();
428     itr->second.rev_cif.finalize();
429     delete itr->second.ref_seq;
430     itr->second.ref_seq = NULL;
431     }
432     }
433     };
434    
435    
436     class JunctionMapVisitor
437     {
438     public:
439     typedef JunctionFinder<RefSequenceTable::Sequence, CIF> JF;
440    
441     struct JunctionTable
442     {
443     JunctionTable() : jf(NULL), possible_splices(NULL) {}
444     JunctionTable(ClosureJunctionSet* ps, JF* _jf, bool as)
445     : jf(_jf), possible_splices(ps), antisense(as) {}
446    
447     JF* jf;
448     ClosureJunctionSet* possible_splices;
449     bool antisense;
450     };
451    
452    
453     map<uint32_t, pair<JunctionTable, JunctionTable> > _finders;
454    
455     JunctionMapVisitor(ClosureJunctionSet& fwd_splices,
456     ClosureJunctionSet& rev_splices,
457     map<uint32_t, RefCIF >& finders)
458     {
459     static const uint32_t bowtie_padding = 5;
460     for (map<uint32_t, RefCIF >::iterator itr = finders.begin();
461     itr != finders.end();
462     ++itr)
463     {
464     JF* fwd_jf = new JF(itr->second.fwd_cif,
465     inner_dist_mean,
466     inner_dist_std_dev,
467     min_closure_intron_length,
468     min_closure_exon_length,
469     bowtie_padding,
470     island_extension);
471     JF* rev_jf = new JF(itr->second.rev_cif,
472     inner_dist_mean,
473     inner_dist_std_dev,
474     min_closure_intron_length,
475     min_closure_exon_length,
476     bowtie_padding,
477     island_extension);
478    
479     _finders[itr->first] = make_pair(JunctionTable(&fwd_splices, fwd_jf,false),
480     JunctionTable(&rev_splices, rev_jf, true));
481     }
482     }
483    
484    
485     void visit(BestPairingHits& pairings)
486     {
487     if (!pairings.first.num_mapped == 2)
488     return;
489     for (size_t i = 0;
490     i < pairings.second.size();
491     ++i)
492     {
493    
494     InsertAlignment& al = pairings.second[i];
495    
496     BowtieHit& bh_left = *(al.left_alignment);
497     BowtieHit& bh_right = *(al.right_alignment);
498    
499     assert (bh_left.ref_id() == bh_right.ref_id());
500    
501     map<uint32_t, pair<JunctionTable, JunctionTable> >::iterator if_itr = _finders.find(bh_left.ref_id());
502     assert (if_itr != _finders.end());
503    
504     JF& fwd_jf = *(if_itr->second.first.jf);
505     fwd_jf.possible_junctions(*(if_itr->second.first.possible_splices), bh_left, bh_right);
506    
507     JF& rev_jf = *(if_itr->second.second.jf);
508     rev_jf.possible_junctions(*(if_itr->second.second.possible_splices), bh_left, bh_right);
509     }
510     }
511     };
512    
513     void closure_driver(vector<FZPipe>& map1,
514     vector<FZPipe>& map2,
515     ifstream& ref_stream,
516 gpertea 154 FILE* juncs_file,
517     FILE* fusions_out)
518 gpertea 29 {
519     typedef RefSequenceTable::Sequence Reference;
520    
521     ReadTable it;
522     RefSequenceTable rt(true);
523 gpertea 154
524     BowtieHitFactory hit_factory(it, rt);
525    
526     std::set<Fusion> fusions;
527 gpertea 29
528     fprintf (stderr, "Finding near-covered motifs...");
529     CoverageMapVisitor cov_map_visitor(ref_stream, rt);
530     uint32_t coverage_attempts = 0;
531    
532     assert(map1.size() == map2.size());
533     for (size_t num = 0; num < map1.size(); ++num)
534     {
535     HitStream left_hs(map1[num].file, &hit_factory, false, true, false);
536     HitStream right_hs(map2[num].file, &hit_factory, false, true, false);
537    
538     HitsForRead curr_left_hit_group;
539     HitsForRead curr_right_hit_group;
540    
541     left_hs.next_read_hits(curr_left_hit_group);
542     right_hs.next_read_hits(curr_right_hit_group);
543    
544     uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
545     uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
546    
547 gpertea 135 while(curr_left_obs_order != VMAXINT32 &&
548     curr_right_obs_order != VMAXINT32)
549 gpertea 29 {
550     while (curr_left_obs_order < curr_right_obs_order&&
551 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
552 gpertea 29 {
553     // Get hit group
554    
555     left_hs.next_read_hits(curr_left_hit_group);
556     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
557     }
558    
559     while (curr_left_obs_order > curr_right_obs_order &&
560 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
561 gpertea 29 {
562     // Get hit group
563    
564     right_hs.next_read_hits(curr_right_hit_group);
565     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
566     }
567    
568     while (curr_left_obs_order == curr_right_obs_order &&
569 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
570 gpertea 154 {
571     if (num == 0)
572     find_fusion_closure(curr_left_hit_group, curr_right_hit_group, fusions);
573    
574     if (coverage_attempts++ % 10000 == 0)
575     fprintf (stderr, "Adding covered motifs from pair %d\n", coverage_attempts);
576    
577 gpertea 29 visit_best_pairing(curr_left_hit_group, curr_right_hit_group, cov_map_visitor);
578    
579     left_hs.next_read_hits(curr_left_hit_group);
580     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
581    
582     right_hs.next_read_hits(curr_right_hit_group);
583     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
584     }
585     }
586     }
587    
588     cov_map_visitor.finalize();
589     fprintf (stderr, "done\n");
590    
591     ClosureJunctionSet fwd_splices;
592     ClosureJunctionSet rev_splices;
593    
594     JunctionMapVisitor junc_map_visitor(fwd_splices, rev_splices, cov_map_visitor.finders);
595     fprintf (stderr, "Searching for closures...");
596     uint32_t closure_attempts = 0;
597    
598     for (size_t num = 0; num < map1.size(); ++num)
599     {
600     map1[num].rewind();
601     map2[num].rewind();
602    
603     HitStream left_hs = HitStream(map1[num].file, &hit_factory, false, true, false);
604     HitStream right_hs = HitStream(map2[num].file, &hit_factory, false, true, false);
605    
606     HitsForRead curr_left_hit_group;
607     HitsForRead curr_right_hit_group;
608    
609     left_hs.next_read_hits(curr_left_hit_group);
610     right_hs.next_read_hits(curr_right_hit_group);
611    
612     uint32_t curr_right_obs_order = it.observation_order(curr_left_hit_group.insert_id);
613     uint32_t curr_left_obs_order = it.observation_order(curr_right_hit_group.insert_id);
614    
615 gpertea 135 while(curr_left_obs_order != VMAXINT32 &&
616     curr_right_obs_order != VMAXINT32)
617 gpertea 29 {
618     while (curr_left_obs_order < curr_right_obs_order &&
619 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
620 gpertea 29 {
621     // Get hit group
622    
623     left_hs.next_read_hits(curr_left_hit_group);
624     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
625     }
626    
627     while (curr_left_obs_order > curr_right_obs_order &&
628 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
629 gpertea 29 {
630     // Get hit group
631    
632     right_hs.next_read_hits(curr_right_hit_group);
633     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
634     }
635    
636     while (curr_left_obs_order == curr_right_obs_order &&
637 gpertea 135 curr_left_obs_order != VMAXINT32 && curr_right_obs_order != VMAXINT32)
638 gpertea 29 {
639 gpertea 154 if (closure_attempts++ % 10000 == 0)
640     fprintf (stderr, "Trying to close pair %d\n", closure_attempts);
641    
642 gpertea 29 visit_best_pairing(curr_left_hit_group, curr_right_hit_group, junc_map_visitor);
643     left_hs.next_read_hits(curr_left_hit_group);
644     curr_left_obs_order = it.observation_order(curr_left_hit_group.insert_id);
645    
646     right_hs.next_read_hits(curr_right_hit_group);
647     curr_right_obs_order = it.observation_order(curr_right_hit_group.insert_id);
648     }
649     }
650     }
651    
652     for (size_t num = 0; num < map1.size(); ++num)
653     {
654     map1[num].close();
655     map2[num].close();
656     }
657    
658     fprintf(stderr, "%lu Forward strand splices\n", fwd_splices.size());
659     fprintf(stderr, "%lu Reverse strand splices\n", rev_splices.size());
660    
661     fprintf (stderr, "done\n");
662     uint32_t num_potential_splices = 0;
663     fprintf (stderr, "Reporting possible junctions...");
664     map<uint32_t, pair<JunctionMapVisitor::JunctionTable, JunctionMapVisitor::JunctionTable> >::iterator f_itr;
665     f_itr = junc_map_visitor._finders.begin();
666    
667     ClosureJunctionSet::iterator j_itr;
668     j_itr = fwd_splices.begin();
669     while (j_itr != fwd_splices.end())
670     {
671     fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
672     rt.get_name(j_itr->refid),
673     j_itr->left,j_itr->right,'+');
674     ++num_potential_splices;
675     ++j_itr;
676     }
677    
678     j_itr = rev_splices.begin();
679     while (j_itr != rev_splices.end())
680     {
681     fprintf (juncs_file,"%s\t%u\t%u\t%c\n",
682     rt.get_name(j_itr->refid),
683     j_itr->left,j_itr->right,'-');
684     ++num_potential_splices;
685     ++j_itr;
686     }
687    
688     //accept_all_best_hits(best_status_for_inserts);
689     fprintf(stderr, "done\n");
690     fprintf(stderr, "Searched for closures between %d pairs\n", searched);
691     fprintf(stderr, "Successfully closed %d pairs\n", closed);
692    
693     fprintf(stderr, "Found %d total possible splices\n", num_potential_splices);
694 gpertea 154
695     // daehwan
696     #if 0
697     fprintf (stderr, "Reporting potential fusions...\n");
698     if(fusions_out){
699     for(std::set<Fusion>::iterator itr = fusions.begin(); itr != fusions.end(); ++itr){
700     const char* ref_name1 = rt.get_name(itr->refid1);
701     const char* ref_name2 = rt.get_name(itr->refid2);
702    
703     const char* dir = "";
704     if (itr->dir == FUSION_FR)
705     dir = "fr";
706     else if(itr->dir == FUSION_RF)
707     dir = "rf";
708     else
709     dir = "ff";
710    
711     fprintf(fusions_out,
712     "%s\t%d\t%s\t%d\t%s\n",
713     ref_name1,
714     itr->left,
715     ref_name2,
716     itr->right,
717     dir);
718     }
719     fclose(fusions_out);
720     }else{
721     fprintf(stderr, "Failed to open fusions file for writing\n");
722     }
723     #endif
724 gpertea 29 }
725    
726     void print_usage()
727     {
728     fprintf(stderr, "Usage: closure_juncs <closure.juncs> <ref.fa> <left_map.bwtout> <right_map.bwtout>\n");
729     }
730    
731     int main(int argc, char** argv)
732     {
733     fprintf(stderr, "closure_juncs v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION);
734     fprintf(stderr, "---------------------------\n");
735    
736     int parse_ret = parse_options(argc, argv, print_usage);
737     if (parse_ret)
738     return parse_ret;
739    
740     if(optind >= argc)
741     {
742     print_usage();
743     return 1;
744     }
745    
746     string junctions_file_name = argv[optind++];
747    
748     if(optind >= argc)
749     {
750     print_usage();
751     return 1;
752     }
753 gpertea 154
754     string fusions_file_name = argv[optind++];
755 gpertea 29
756 gpertea 154 if(optind >= argc)
757     {
758     print_usage();
759     return 1;
760     }
761    
762 gpertea 29 string ref_fasta = argv[optind++];
763 gpertea 154
764 gpertea 29 if(optind >= argc)
765     {
766     print_usage();
767     return 1;
768     }
769    
770     string left_file_list = argv[optind++];
771     vector<string> left_file_names;
772     vector<FZPipe> left_files;
773     tokenize(left_file_list, ",", left_file_names);
774    
775     string unzcmd = getUnpackCmd(left_file_names[0], false);
776     for (size_t i = 0; i < left_file_names.size(); ++i)
777     {
778     FZPipe seg_file(left_file_names[i], unzcmd);
779     if (seg_file.file == NULL)
780     {
781     fprintf(stderr, "Error: cannot open file %s for reading\n",
782     left_file_names[i].c_str());
783     exit(1);
784     }
785     left_files.push_back(seg_file);
786     }
787    
788     if(optind >= argc)
789     {
790     print_usage();
791     return 1;
792     }
793    
794     string right_file_list = argv[optind++];
795     vector<string> right_file_names;
796     vector<FZPipe> right_files;
797     tokenize(right_file_list, ",", right_file_names);
798     for (size_t i = 0; i < right_file_names.size(); ++i)
799     {
800     FZPipe seg_file(right_file_names[i], unzcmd);
801     if (seg_file.file == NULL)
802     {
803     fprintf(stderr, "Error: cannot open %s for reading\n",
804     right_file_names[i].c_str());
805     exit(1);
806     }
807     right_files.push_back(seg_file);
808     }
809    
810     ifstream ref_stream(ref_fasta.c_str(), ifstream::in);
811    
812     FILE* splice_db = fopen(junctions_file_name.c_str(), "w");
813     if (splice_db == NULL)
814     {
815     fprintf(stderr, "Error: cannot open junctions file %s for writing\n",
816     junctions_file_name.c_str());
817     exit(1);
818     }
819 gpertea 154
820     FILE* fusion_db = fopen(fusions_file_name.c_str(), "w");
821     if (splice_db == NULL)
822     {
823     fprintf(stderr, "Error: cannot open fusions file %s for writing\n",
824     fusions_file_name.c_str());
825     exit(1);
826     }
827    
828 gpertea 29 closure_driver(left_files,
829     right_files,
830     ref_stream,
831 gpertea 154 splice_db,
832     fusion_db);
833 gpertea 29
834     return 0;
835     }