ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/mask_sam.cpp
Revision: 29
Committed: Tue Aug 2 21:24:54 2011 UTC (9 years, 3 months ago) by gpertea
File size: 3598 byte(s)
Log Message:
adding tophat source work

Line User Rev File contents
1 gpertea 29 /*
2     * mask_sam.cpp
3     * TopHat
4     *
5     * Created by Cole Trapnell on 9/10/09.
6     * Copyright 2009 Cole Trapnell. All rights reserved.
7     *
8     */
9    
10     #include <cstdlib>
11     #include <cstdio>
12     #include <getopt.h>
13    
14     #include "common.h"
15     #include "bwt_map.h"
16    
17     void print_usage()
18     {
19     fprintf(stderr, "Usage: mask_sam hits.sam repeatmask.out\n");
20     }
21    
22     struct Repeat
23     {
24     Repeat(uint32_t i, int l, int r) : ref_id(i), left(l), right(r) {}
25     uint32_t ref_id;
26     int left;
27     int right;
28    
29     bool operator<(const Repeat& rhs) const
30     {
31     if (ref_id != rhs.ref_id)
32     return ref_id < rhs.ref_id;
33     if (left != rhs.left)
34     return left < rhs.left;
35     if (right != rhs.right)
36     return right < rhs.right;
37     return false;
38     }
39     };
40    
41     void driver(FILE* map_file, FILE* repeat_file)
42     {
43     RefSequenceTable rt(true, true);
44     ReadTable it;
45    
46     SAMHitFactory factory(it, rt);
47    
48     vector<Repeat > repeats;
49    
50     char buf[1024 * 16];
51     while(!feof(repeat_file) &&
52     fgets(buf, sizeof(buf), repeat_file))
53     {
54     char chrom[1024];
55    
56     int left;
57     int right;
58     int score;
59     float t1,t2,t3;
60     int ret = sscanf(buf, "%d %f %f %f %s %d %d", &score,&t1,&t2,&t3,chrom,&left,&right);
61     if (ret < 7)
62     continue;
63    
64     int id = rt.get_id(chrom, NULL, 0);
65     repeats.push_back(Repeat(id, left, right));
66     }
67    
68    
69     sort(repeats.begin(), repeats.end());
70    
71     vector<Repeat>::iterator curr_repeat = repeats.begin();
72    
73     static int buf_size = 4096;
74     char bwt_buf[buf_size];
75    
76     uint32_t last_ref_id_seen = 0;
77     int last_pos_seen = 0;
78    
79     while (fgets(bwt_buf, 4096, map_file))
80     {
81     // Chomp the newline
82     char* nl = strrchr(bwt_buf, '\n');
83     if (nl) *nl = 0;
84    
85     if (*bwt_buf == 0)
86     continue;
87    
88     string clean_buf = bwt_buf;
89     // Get a new record from the tab-delimited Bowtie map
90     BowtieHit bh;
91    
92     if (factory.get_hit_from_buf(bwt_buf, bh, true))
93     {
94    
95     if (bh.left() < last_pos_seen)
96     {
97     fprintf(stderr, "Error: this SAM file doesn't appear to be correctly sorted!\n");
98     fprintf(stderr, "\tcurrent hit is at %s:%d, last one was at %s:%d\n",
99     rt.get_name(bh.ref_id()),
100     bh.left(),
101     rt.get_name(last_ref_id_seen),
102     last_pos_seen);
103    
104     exit(1);
105     }
106    
107     while (curr_repeat != repeats.end() &&
108     (curr_repeat->ref_id < bh.ref_id() ||
109     curr_repeat->ref_id == bh.ref_id() && curr_repeat->right < bh.left()))
110     {
111     curr_repeat++;
112     }
113    
114     vector<Repeat>::iterator next_repeat = curr_repeat;
115    
116     bool found_mask = false;
117     while (next_repeat != repeats.end() &&
118     next_repeat->left < bh.right())
119     {
120     if (next_repeat->ref_id == bh.ref_id() &&
121     next_repeat->left <= bh.left() && next_repeat->right >= bh.right())
122     {
123     found_mask = true;
124     break;
125     }
126     next_repeat++;
127     }
128     if (!found_mask)
129     {
130     printf("%s\n", clean_buf.c_str());
131     }
132     }
133     }
134     }
135    
136    
137     int main(int argc, char** argv)
138     {
139     int parse_ret = parse_options(argc, argv, print_usage);
140     if (parse_ret)
141     return parse_ret;
142    
143     if(optind >= argc)
144     {
145     print_usage();
146     return 1;
147     }
148    
149     string map_file_name = argv[optind++];
150    
151     if(optind >= argc)
152     {
153     print_usage();
154     return 1;
155     }
156    
157     string repeat_file_name = argv[optind++];
158    
159    
160     FILE* map_file = fopen(map_file_name.c_str(), "r");
161     if (map_file == NULL)
162     {
163     fprintf(stderr, "Error: cannot open SAM map file %s for reading\n",
164     map_file_name.c_str());
165     exit(1);
166     }
167    
168     FILE* repeat_file = fopen(repeat_file_name.c_str(), "r");
169     if (repeat_file == NULL)
170     {
171     fprintf(stderr, "Error: cannot open SAM map file %s for reading\n",
172     repeat_file_name.c_str());
173     exit(1);
174     }
175    
176     driver(map_file, repeat_file);
177    
178     return 0;
179     }