ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/reads.h
Revision: 135
Committed: Mon Dec 12 22:28:38 2011 UTC (9 years, 2 months ago) by gpertea
File size: 5498 byte(s)
Log Message:
wip - SplicedSAMHitFactory() still not implemented

Line User Rev File contents
1 gpertea 29 #ifndef READS_H
2     #define READS_H
3     /*
4     * reads.h
5     * TopHat
6     *
7     * Created by Cole Trapnell on 9/2/08.
8     * Copyright 2008 Cole Trapnell. All rights reserved.
9     *
10     */
11    
12     #include <string>
13     #include <sstream>
14     #include <seqan/sequence.h>
15     #include "common.h"
16 gpertea 135 #include <queue>
17 gpertea 29
18     using std::string;
19    
20 gpertea 135 static const int max_read_bp = 256;
21 gpertea 29
22     // Note: qualities are not currently used by TopHat
23     struct Read
24     {
25     Read()
26     {
27     seq.reserve(max_read_bp);
28     qual.reserve(max_read_bp);
29     }
30    
31     string name;
32     string seq;
33     string alt_name;
34     string qual;
35    
36     bool lengths_equal() { return seq.length() == qual.length(); }
37     void clear()
38     {
39     name.clear();
40     seq.clear();
41     qual.clear();
42     alt_name.clear();
43     }
44     };
45    
46     void reverse_complement(string& seq);
47     string convert_color_to_bp(const string& color);
48     seqan::String<char> convert_color_to_bp(char base, const seqan::String<char>& color);
49    
50     string convert_bp_to_color(const string& bp, bool remove_primer = false);
51     seqan::String<char> convert_bp_to_color(const seqan::String<char>& bp, bool remove_primer = false);
52    
53     /*
54     This is a dynamic programming to decode a colorspace read, which is from BWA paper.
55    
56     Heng Li and Richard Durbin
57     Fast and accurate short read alignment with Burrows-Wheeler transform
58     */
59     void BWA_decode(const string& color, const string& qual, const string& ref, string& decode);
60    
61    
62     template <class Type>
63     string DnaString_to_string(const Type& dnaString)
64     {
65     std::string result;
66     std::stringstream ss(std::stringstream::in | std::stringstream::out);
67     ss << dnaString >> result;
68     return result;
69     }
70    
71     class ReadTable;
72 gpertea 135 /*
73 gpertea 29 bool get_read_from_stream(uint64_t insert_id,
74     FILE* reads_file,
75     ReadFormat reads_format,
76     bool strip_slash,
77     char read_name [],
78     char read_seq [],
79     char read_alt_name [],
80 gpertea 135 char read_qual [],
81     FILE* um_out=NULL); //unmapped reads output
82     */
83     bool get_read_from_stream(uint64_t insert_id,
84     FILE* reads_file,
85     ReadFormat reads_format,
86     bool strip_slash,
87     Read& read,
88     FILE* um_out=NULL, //unmapped reads output
89     bool um_write_found=false);
90 gpertea 29
91     class FLineReader { //simple text line reader class, buffering last line read
92     int len;
93     int allocated;
94     char* buf;
95     bool isEOF;
96     FILE* file;
97     bool is_pipe;
98     bool pushed; //pushed back
99     int lcount; //counting all lines read by the object
100     public:
101     char* chars() { return buf; }
102     char* line() { return buf; }
103     int readcount() { return lcount; } //number of lines read
104     int length() { return len; } //length of the last line read
105     bool isEof() {return isEOF; }
106     char* nextLine();
107     FILE* fhandle() { return file; }
108     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
109     // so the next call will in fact return the same line
110     FLineReader(FILE* stream=NULL) {
111     len=0;
112     isEOF=false;
113     is_pipe=false;
114     allocated=512;
115     buf=(char*)malloc(allocated);
116     lcount=0;
117     buf[0]=0;
118     file=stream;
119     pushed=false;
120     }
121    
122     FLineReader(FZPipe& fzpipe) {
123     len=0;
124     isEOF=false;
125     allocated=512;
126     buf=(char*)malloc(allocated);
127     lcount=0;
128     buf[0]=0;
129     file=fzpipe.file;
130     is_pipe=!fzpipe.pipecmd.empty();
131     pushed=false;
132     }
133     void close() {
134     if (file==NULL) return;
135     if (is_pipe) pclose(file);
136     else fclose(file);
137     }
138     ~FLineReader() {
139     free(buf); //does not call close() -- we might reuse the file handle
140     }
141     };
142    
143 gpertea 135
144 gpertea 29 void skip_lines(FLineReader& fr);
145     bool next_fasta_record(FLineReader& fr, string& defline, string& seq, ReadFormat reads_format);
146     bool next_fastq_record(FLineReader& fr, const string& seq, string& alt_name, string& qual, ReadFormat reads_format);
147 gpertea 135 bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format=FASTQ,
148 gpertea 29 FLineReader* frq=NULL);
149    
150 gpertea 135
151    
152     class ReadStream {
153     protected:
154     struct ReadOrdering
155     {
156     bool operator()(std::pair<uint64_t, Read>& lhs, std::pair<uint64_t, Read>& rhs)
157     {
158     return (lhs.first > rhs.first);
159     }
160     };
161     FZPipe fstream;
162     std::priority_queue< std::pair<uint64_t, Read>,
163     std::vector<std::pair<uint64_t, Read> >,
164     ReadOrdering > read_pq;
165     uint64_t last_id; //keep track of last requested ID, for consistency check
166     bool r_eof;
167     bool next_read(Read& read, ReadFormat read_format); //get top read from the queue
168    
169     public:
170     ReadStream():fstream(), read_pq(), last_id(0), r_eof(false) { }
171    
172     ReadStream(string& fname):fstream(fname, false),
173     read_pq(), last_id(0), r_eof(false) { }
174    
175     void init(string& fname) {
176     fstream.openRead(fname, false);
177     }
178     const char* filename() {
179     return fstream.filename.c_str();
180     }
181     //read_ids must ALWAYS be requested in increasing order
182     bool getRead(uint64_t read_id, Read& read,
183     ReadFormat read_format=FASTQ,
184     bool strip_slash=false,
185     FILE* um_out=NULL, //unmapped reads output
186     bool um_write_found=false);
187    
188     void rewind() {
189     fstream.rewind();
190     clear();
191     }
192     FILE* file() {
193     return fstream.file;
194     }
195     void clear() {
196     /* while (read_pq.size()) {
197     const std::pair<uint64_t, Read>& t = read_pq.top();
198     //free(t.second);
199     read_pq.pop();
200     } */
201     read_pq=std::priority_queue< std::pair<uint64_t, Read>,
202     std::vector<std::pair<uint64_t, Read> >,
203     ReadOrdering > ();
204     }
205     void close() {
206     clear();
207     fstream.close();
208     }
209     ~ReadStream() {
210     close();
211     }
212     };
213 gpertea 29 #endif