ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/utils.cpp
Revision: 158
Committed: Sun Jan 29 19:42:10 2012 UTC (8 years, 10 months ago) by gpertea
File size: 3276 byte(s)
Log Message:
Line File contents
1 /*
2 * utils.cpp
3 * TopHat
4 *
5 * Created by Daehwan Kim on 12/28/11.
6 * Copyright 2011 Daehwan Kim. All rights reserved.
7 *
8 */
9
10
11 #ifdef HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #include <iostream>
16 #include <fstream>
17 #include <sstream>
18 #include <algorithm>
19
20 #include "utils.h"
21
22 bool calculate_offsets(const vector<string>& fnames,
23 vector<uint64_t>& ids,
24 vector<vector<int64_t> >& offsets)
25 {
26 vector<vector<pair<uint64_t, int64_t> > > index_list(fnames.size());
27 for (size_t i = 0; i < fnames.size(); ++i)
28 {
29 ifstream reads_index_file((fnames[i] + ".index").c_str());
30 string line;
31 while (getline(reads_index_file, line))
32 {
33 istringstream istream(line);
34 uint64_t read_id;
35 int64_t offset;
36
37 istream >> read_id >> offset;
38 index_list[i].push_back(make_pair(read_id, offset));
39 }
40 }
41
42 // too small for blocking
43 if (index_list.front().size() < (size_t)num_threads)
44 return false;
45
46 offsets.resize(num_threads - 1);
47 for (int i = 1; i < num_threads; ++i)
48 {
49 size_t index = index_list.back().size() / num_threads * i;
50 const pair<uint64_t, int64_t>& data = index_list.back()[index];
51 uint64_t id = data.first;
52 int64_t offset = data.second;
53
54 ids.push_back(id);
55 offsets[i-1].push_back(offset);
56
57 // daehwan - print index
58 fprintf(stderr, "%lu)read %lu - offset %ld\n", index_list.size() - 1, id, offset);
59
60 for (int j = index_list.size() - 2; j >= 0; --j)
61 {
62 size_t other_index = index_list[j].size() / num_threads * i;
63 uint64_t other_id = index_list[j][other_index].first;
64
65 while (other_id > id && other_index > 0)
66 {
67 other_index -= 1;
68 other_id = index_list[j][other_index].first;
69 }
70
71 while (other_index + 1 < index_list[j].size() && index_list[j][other_index+1].first < id)
72 {
73 other_index += 1;
74 other_id = index_list[j][other_index].first;
75 }
76
77 int64_t other_offset = index_list[j][other_index].second;
78
79 if (other_id > id)
80 {
81 other_id = 0;
82 other_offset = 0;
83 }
84
85 id = other_id;
86 offsets[i-1].push_back(other_offset);
87
88 // daehwan - print index
89 fprintf(stderr, "\t%d)read %lu - offset %lu\n", j, other_id, other_offset);
90 }
91
92 reverse(offsets[i-1].begin(), offsets[i-1].end());
93 }
94
95 return true;
96 }
97
98 void calculate_offsets_from_ids(const string& fname,
99 const vector<uint64_t>& ids,
100 vector<int64_t>& offsets)
101 {
102 vector<pair<uint64_t, int64_t> > index_list;
103 ifstream reads_index_file((fname + ".index").c_str());
104
105 string line;
106 uint64_t last_id = 0;
107 int64_t last_offset = 0;
108 for (size_t i = 0; i < ids.size(); ++i)
109 {
110 uint64_t ref_id = ids[i];
111 while (getline(reads_index_file, line))
112 {
113 istringstream istream(line);
114 uint64_t read_id;
115 int64_t offset;
116
117 istream >> read_id >> offset;
118 if (read_id > ref_id)
119 {
120 assert(last_id <= ref_id);
121 offsets.push_back(last_offset);
122
123 // daehwan - print index
124 fprintf(stderr, "ref read: %lu\n", ref_id);
125 fprintf(stderr, "\tread %lu - offset %lu\n", last_id, last_offset);
126 }
127
128 last_id = read_id;
129 last_offset = offset;
130
131 if (last_id > ref_id)
132 break;
133 }
134 }
135
136 if (ids.size() != offsets.size())
137 offsets.clear();
138 }