ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/reads.h
Revision: 159
Committed: Mon Jan 30 22:38:08 2012 UTC (7 years, 9 months ago) by gpertea
File size: 5608 byte(s)
Log Message:
wip

Line User Rev File contents
1 gpertea 29 #ifndef READS_H
2     #define READS_H
3     /*
4     * reads.h
5     * TopHat
6     *
7     * Created by Cole Trapnell on 9/2/08.
8     * Copyright 2008 Cole Trapnell. All rights reserved.
9     *
10     */
11    
12     #include <string>
13     #include <sstream>
14 gpertea 154 #include <queue>
15     #include <limits>
16 gpertea 29 #include <seqan/sequence.h>
17     #include "common.h"
18    
19 gpertea 154
20 gpertea 29 using std::string;
21    
22 gpertea 135 static const int max_read_bp = 256;
23 gpertea 29
24     // Note: qualities are not currently used by TopHat
25     struct Read
26     {
27     Read()
28     {
29     seq.reserve(max_read_bp);
30     qual.reserve(max_read_bp);
31     }
32    
33     string name;
34     string seq;
35     string alt_name;
36     string qual;
37    
38     bool lengths_equal() { return seq.length() == qual.length(); }
39     void clear()
40     {
41     name.clear();
42     seq.clear();
43     qual.clear();
44     alt_name.clear();
45     }
46     };
47    
48     void reverse_complement(string& seq);
49     string convert_color_to_bp(const string& color);
50     seqan::String<char> convert_color_to_bp(char base, const seqan::String<char>& color);
51    
52     string convert_bp_to_color(const string& bp, bool remove_primer = false);
53     seqan::String<char> convert_bp_to_color(const seqan::String<char>& bp, bool remove_primer = false);
54    
55     /*
56     This is a dynamic programming to decode a colorspace read, which is from BWA paper.
57    
58     Heng Li and Richard Durbin
59     Fast and accurate short read alignment with Burrows-Wheeler transform
60     */
61     void BWA_decode(const string& color, const string& qual, const string& ref, string& decode);
62    
63    
64     template <class Type>
65     string DnaString_to_string(const Type& dnaString)
66     {
67     std::string result;
68     std::stringstream ss(std::stringstream::in | std::stringstream::out);
69     ss << dnaString >> result;
70     return result;
71     }
72    
73     class ReadTable;
74    
75     class FLineReader { //simple text line reader class, buffering last line read
76     int len;
77     int allocated;
78     char* buf;
79     bool isEOF;
80     FILE* file;
81     bool is_pipe;
82     bool pushed; //pushed back
83     int lcount; //counting all lines read by the object
84 gpertea 154
85     public:
86     // daehwan - this is not a good place to store the last read ...
87     Read last_read;
88     bool pushed_read;
89    
90 gpertea 29 public:
91     char* chars() { return buf; }
92     char* line() { return buf; }
93     int readcount() { return lcount; } //number of lines read
94     int length() { return len; } //length of the last line read
95     bool isEof() {return isEOF; }
96     char* nextLine();
97     FILE* fhandle() { return file; }
98     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
99     // so the next call will in fact return the same line
100 gpertea 154 void pushBack_read() { if(!last_read.name.empty()) pushed_read=true;}
101 gpertea 29 FLineReader(FILE* stream=NULL) {
102     len=0;
103     isEOF=false;
104     is_pipe=false;
105     allocated=512;
106     buf=(char*)malloc(allocated);
107     lcount=0;
108     buf[0]=0;
109     file=stream;
110     pushed=false;
111 gpertea 154 pushed_read=false;
112 gpertea 29 }
113     FLineReader(FZPipe& fzpipe) {
114     len=0;
115     isEOF=false;
116     allocated=512;
117     buf=(char*)malloc(allocated);
118     lcount=0;
119     buf[0]=0;
120     file=fzpipe.file;
121     is_pipe=!fzpipe.pipecmd.empty();
122     pushed=false;
123 gpertea 154 pushed_read=false;
124 gpertea 29 }
125     void close() {
126     if (file==NULL) return;
127     if (is_pipe) pclose(file);
128     else fclose(file);
129     }
130 gpertea 154
131 gpertea 29 ~FLineReader() {
132     free(buf); //does not call close() -- we might reuse the file handle
133     }
134     };
135    
136 gpertea 154 bool get_read_from_stream(uint64_t insert_id,
137     FLineReader& fr,
138     ReadFormat reads_format,
139     bool strip_slash,
140     Read& read,
141     FILE* um_out=NULL, //unmapped reads output
142     bool um_write_found=false);
143 gpertea 135
144 gpertea 29 void skip_lines(FLineReader& fr);
145     bool next_fasta_record(FLineReader& fr, string& defline, string& seq, ReadFormat reads_format);
146     bool next_fastq_record(FLineReader& fr, const string& seq, string& alt_name, string& qual, ReadFormat reads_format);
147 gpertea 135 bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format=FASTQ,
148 gpertea 29 FLineReader* frq=NULL);
149    
150 gpertea 135 class ReadStream {
151     protected:
152     struct ReadOrdering
153     {
154     bool operator()(std::pair<uint64_t, Read>& lhs, std::pair<uint64_t, Read>& rhs)
155     {
156     return (lhs.first > rhs.first);
157     }
158     };
159     FZPipe fstream;
160     std::priority_queue< std::pair<uint64_t, Read>,
161     std::vector<std::pair<uint64_t, Read> >,
162     ReadOrdering > read_pq;
163     uint64_t last_id; //keep track of last requested ID, for consistency check
164     bool r_eof;
165     bool next_read(Read& read, ReadFormat read_format); //get top read from the queue
166    
167     public:
168     ReadStream():fstream(), read_pq(), last_id(0), r_eof(false) { }
169    
170 gpertea 154 ReadStream(const string& fname):fstream(fname, false),
171 gpertea 135 read_pq(), last_id(0), r_eof(false) { }
172    
173     void init(string& fname) {
174     fstream.openRead(fname, false);
175     }
176     const char* filename() {
177     return fstream.filename.c_str();
178     }
179     //read_ids must ALWAYS be requested in increasing order
180     bool getRead(uint64_t read_id, Read& read,
181 gpertea 154 ReadFormat read_format=FASTQ,
182     bool strip_slash=false,
183 gpertea 159 uint64_t begin_id = 0,
184     uint64_t end_id=std::numeric_limits<uint64_t>::max(),
185 gpertea 154 FILE* um_out=NULL, //unmapped reads output
186 gpertea 159 bool um_write_found=false
187     );
188 gpertea 135
189     void rewind() {
190     fstream.rewind();
191     clear();
192     }
193 gpertea 154 void seek(int64_t offset) {
194     clear();
195     fstream.seek(offset);
196     }
197 gpertea 135 FILE* file() {
198     return fstream.file;
199     }
200     void clear() {
201     /* while (read_pq.size()) {
202     const std::pair<uint64_t, Read>& t = read_pq.top();
203     //free(t.second);
204     read_pq.pop();
205     } */
206     read_pq=std::priority_queue< std::pair<uint64_t, Read>,
207     std::vector<std::pair<uint64_t, Read> >,
208     ReadOrdering > ();
209     }
210     void close() {
211     clear();
212     fstream.close();
213     }
214     ~ReadStream() {
215     close();
216     }
217     };
218 gpertea 29 #endif