ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/reads.h
Revision: 135
Committed: Mon Dec 12 22:28:38 2011 UTC (9 years, 2 months ago) by gpertea
File size: 5498 byte(s)
Log Message:
wip - SplicedSAMHitFactory() still not implemented

Line File contents
1 #ifndef READS_H
2 #define READS_H
3 /*
4 * reads.h
5 * TopHat
6 *
7 * Created by Cole Trapnell on 9/2/08.
8 * Copyright 2008 Cole Trapnell. All rights reserved.
9 *
10 */
11
12 #include <string>
13 #include <sstream>
14 #include <seqan/sequence.h>
15 #include "common.h"
16 #include <queue>
17
18 using std::string;
19
20 static const int max_read_bp = 256;
21
22 // Note: qualities are not currently used by TopHat
23 struct Read
24 {
25 Read()
26 {
27 seq.reserve(max_read_bp);
28 qual.reserve(max_read_bp);
29 }
30
31 string name;
32 string seq;
33 string alt_name;
34 string qual;
35
36 bool lengths_equal() { return seq.length() == qual.length(); }
37 void clear()
38 {
39 name.clear();
40 seq.clear();
41 qual.clear();
42 alt_name.clear();
43 }
44 };
45
46 void reverse_complement(string& seq);
47 string convert_color_to_bp(const string& color);
48 seqan::String<char> convert_color_to_bp(char base, const seqan::String<char>& color);
49
50 string convert_bp_to_color(const string& bp, bool remove_primer = false);
51 seqan::String<char> convert_bp_to_color(const seqan::String<char>& bp, bool remove_primer = false);
52
53 /*
54 This is a dynamic programming to decode a colorspace read, which is from BWA paper.
55
56 Heng Li and Richard Durbin
57 Fast and accurate short read alignment with Burrows-Wheeler transform
58 */
59 void BWA_decode(const string& color, const string& qual, const string& ref, string& decode);
60
61
62 template <class Type>
63 string DnaString_to_string(const Type& dnaString)
64 {
65 std::string result;
66 std::stringstream ss(std::stringstream::in | std::stringstream::out);
67 ss << dnaString >> result;
68 return result;
69 }
70
71 class ReadTable;
72 /*
73 bool get_read_from_stream(uint64_t insert_id,
74 FILE* reads_file,
75 ReadFormat reads_format,
76 bool strip_slash,
77 char read_name [],
78 char read_seq [],
79 char read_alt_name [],
80 char read_qual [],
81 FILE* um_out=NULL); //unmapped reads output
82 */
83 bool get_read_from_stream(uint64_t insert_id,
84 FILE* reads_file,
85 ReadFormat reads_format,
86 bool strip_slash,
87 Read& read,
88 FILE* um_out=NULL, //unmapped reads output
89 bool um_write_found=false);
90
91 class FLineReader { //simple text line reader class, buffering last line read
92 int len;
93 int allocated;
94 char* buf;
95 bool isEOF;
96 FILE* file;
97 bool is_pipe;
98 bool pushed; //pushed back
99 int lcount; //counting all lines read by the object
100 public:
101 char* chars() { return buf; }
102 char* line() { return buf; }
103 int readcount() { return lcount; } //number of lines read
104 int length() { return len; } //length of the last line read
105 bool isEof() {return isEOF; }
106 char* nextLine();
107 FILE* fhandle() { return file; }
108 void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
109 // so the next call will in fact return the same line
110 FLineReader(FILE* stream=NULL) {
111 len=0;
112 isEOF=false;
113 is_pipe=false;
114 allocated=512;
115 buf=(char*)malloc(allocated);
116 lcount=0;
117 buf[0]=0;
118 file=stream;
119 pushed=false;
120 }
121
122 FLineReader(FZPipe& fzpipe) {
123 len=0;
124 isEOF=false;
125 allocated=512;
126 buf=(char*)malloc(allocated);
127 lcount=0;
128 buf[0]=0;
129 file=fzpipe.file;
130 is_pipe=!fzpipe.pipecmd.empty();
131 pushed=false;
132 }
133 void close() {
134 if (file==NULL) return;
135 if (is_pipe) pclose(file);
136 else fclose(file);
137 }
138 ~FLineReader() {
139 free(buf); //does not call close() -- we might reuse the file handle
140 }
141 };
142
143
144 void skip_lines(FLineReader& fr);
145 bool next_fasta_record(FLineReader& fr, string& defline, string& seq, ReadFormat reads_format);
146 bool next_fastq_record(FLineReader& fr, const string& seq, string& alt_name, string& qual, ReadFormat reads_format);
147 bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format=FASTQ,
148 FLineReader* frq=NULL);
149
150
151
152 class ReadStream {
153 protected:
154 struct ReadOrdering
155 {
156 bool operator()(std::pair<uint64_t, Read>& lhs, std::pair<uint64_t, Read>& rhs)
157 {
158 return (lhs.first > rhs.first);
159 }
160 };
161 FZPipe fstream;
162 std::priority_queue< std::pair<uint64_t, Read>,
163 std::vector<std::pair<uint64_t, Read> >,
164 ReadOrdering > read_pq;
165 uint64_t last_id; //keep track of last requested ID, for consistency check
166 bool r_eof;
167 bool next_read(Read& read, ReadFormat read_format); //get top read from the queue
168
169 public:
170 ReadStream():fstream(), read_pq(), last_id(0), r_eof(false) { }
171
172 ReadStream(string& fname):fstream(fname, false),
173 read_pq(), last_id(0), r_eof(false) { }
174
175 void init(string& fname) {
176 fstream.openRead(fname, false);
177 }
178 const char* filename() {
179 return fstream.filename.c_str();
180 }
181 //read_ids must ALWAYS be requested in increasing order
182 bool getRead(uint64_t read_id, Read& read,
183 ReadFormat read_format=FASTQ,
184 bool strip_slash=false,
185 FILE* um_out=NULL, //unmapped reads output
186 bool um_write_found=false);
187
188 void rewind() {
189 fstream.rewind();
190 clear();
191 }
192 FILE* file() {
193 return fstream.file;
194 }
195 void clear() {
196 /* while (read_pq.size()) {
197 const std::pair<uint64_t, Read>& t = read_pq.top();
198 //free(t.second);
199 read_pq.pop();
200 } */
201 read_pq=std::priority_queue< std::pair<uint64_t, Read>,
202 std::vector<std::pair<uint64_t, Read> >,
203 ReadOrdering > ();
204 }
205 void close() {
206 clear();
207 fstream.close();
208 }
209 ~ReadStream() {
210 close();
211 }
212 };
213 #endif