ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/cdbfasta/gcdbz.cpp
Revision: 8
Committed: Mon Mar 22 22:11:25 2010 UTC (12 years, 6 months ago) by gpertea
File size: 8904 byte(s)
Log Message:
added cdbfasta source files

Line File contents
1 #include "gcdbz.h"
2
3 GCdbz::GCdbz(FILE* azf, bool uc, int zrsize) {
4 uncompress=uc;
5 zrecsize=-1;
6 zpos=0;
7 defline_cap=1024;
8 begin_defline();
9 GMALLOC(defline, defline_cap);
10 zf=azf;
11 // FULL_FLUSH method instead of finish:
12 if (uncompress)
13 decomp_start(zrsize);
14 else
15 compress_start();
16 }
17
18 GCdbz::~GCdbz() {
19 //if (zf!=NULL && zf!=stdout && zf!=stdin) fclose(zf);
20 // FULL_FLUSH method instead of finish
21 if (uncompress) decomp_end();
22 else
23 if (!zclosed) compress_end();
24 GFREE(defline);
25 }
26
27
28
29 void GCdbz::extend_defline(int ch) {
30 if (defline_len+1 >= defline_cap) {
31 defline_cap+=(defline_cap>>2);
32 GREALLOC(defline, defline_cap);
33 }
34 defline[defline_len]= ch;
35 defline_len++;
36 }
37
38
39 #define DUMMY_ZREC ">AA1234567890 DNA protein\n\
40 ACGTTGCTAGCT\n\
41 NRMTPYYHEIEP\n\
42 RTASNTSPTPNS\n\
43 IKSAHPAEPPKR\n"
44
45 void GCdbz::compress_start() {
46 //initialize zstream compression
47 zstream.zalloc = (alloc_func)0; //no alloc function to use
48 zstream.zfree = (free_func)0; //no free function to use
49 zstream.opaque = (voidpf)0; //no private object to pass to zalloc/zfree
50
51 int err=deflateInit(&zstream, Z_DEFAULT_COMPRESSION);
52 if (err!=Z_OK)
53 GError("GCdbz error: deflateInit failed!(err=%d)\n",err);
54 zclosed=false;
55 //write a dummy record as the first record,
56 //so we can use random access (FULL_FLUSH style) later
57 char ztag[5];strcpy(ztag, "CDBZ");
58 uint32 zsize=0;
59 zstream.next_in = (Bytef*)sbuf;
60 strcpy(sbuf, DUMMY_ZREC);
61 zstream.avail_in=strlen(sbuf);
62 zstream.next_out = (Bytef*)lbuf;
63 zstream.avail_out = GCDBZ_LBUF_LEN;
64 uLong t_out=zstream.total_out;
65 err = deflate(&zstream, Z_FULL_FLUSH);
66 zsize=zstream.total_out-t_out;
67 if ((err !=Z_OK && err!=Z_STREAM_END) || zsize<=0)
68 GError("GCdbz error: deflate 1st record failed! (err=%d)\n", err);
69 //now write the header and the dummy record
70 //in case this was not done before:
71 gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86;
72 uint32 zfv = gcvt_uint(&zsize);
73 if (fwrite(ztag, 1, 4, zf)<4 ||
74 fwrite(&zfv,1,sizeof(uint32), zf) < sizeof(uint32) ||
75 fwrite(lbuf, 1, zsize, zf) < zsize)
76 GError("Error writing 1st deflated record!\n");
77 zpos+=4+sizeof(uint32)+zsize;
78 }
79
80 void GCdbz::compress_end() {
81 zstream.next_out = (Bytef*)lbuf;
82 zstream.avail_out = GCDBZ_LBUF_LEN;
83 zstream.avail_in = 0;
84 uLong t_out=zstream.total_out;
85 int err = deflate(&zstream, Z_FINISH);
86 if (err != Z_STREAM_END) {
87 GError("GCdbz error: deflate/Z_FINISH() failed! (err=%d) \n", err);
88 }
89 uLong toWrite=zstream.total_out-t_out;
90 if (toWrite>0) {
91 if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
92 GError("Error writing FINISH deflate chunk!\n");
93 //GError("GCdbz error: out data after Z_FINISH (%d bytes)\n",
94 // zstream.total_out-t_out);
95 }
96 err=deflateEnd(&zstream);
97 if (err!=Z_OK)
98 GError("GCdbz error: deflateEnd() failed! (err=%d) \n", err);
99 zclosed=true;
100 }
101
102 char* GCdbz::compress(GReadBuf *readbuf, char* delim) {
103 //compress everything coming from the input stream inf
104 //until \n is encountered followed by delim
105 //returns this->defline or NULL if error encountered
106
107 //-- WARNING: this subrutine assumes that inf file position
108 // is at the beginning of the record, right AFTER the delim
109 // (exactly as left after a previous call)
110 if (zf==NULL || uncompress)
111 GError("GCdbz Error: cannot use compress() method !\n");
112 unsigned int total_out=0;
113 int c=0;
114 bool in_rec=true;
115 int delimlen=strlen(delim);
116 zrecsize=0;
117 if ((c=readbuf->peekCmp(delim, delimlen))!=0) {
118 if (c<-1) return NULL; //end of file reached
119 GError("GCdbZ::compress error: delimiter '%s' expected at record start!\n",
120 delim);
121 }
122 bool bol=false; //beginning of line flag
123 int deflate_flag=0;
124 begin_defline();
125 int rec_pos=0;
126 int err=0;
127 while (in_rec) { // main read loop
128 int bytes_read=0;
129 while ((c=readbuf->getch())>=0) {
130 sbuf[bytes_read++]=c;
131 if (c=='\n' || c=='\r') { //beginning of line
132 bol = true;
133 if (in_defline) end_defline();
134 //look_ahead for record delimiter:
135 if (readbuf->peekCmp(delim, delimlen)==0) {
136 in_rec=false;
137 break;
138 }
139 }
140 else bol = false;
141 if (rec_pos>delimlen-1 && in_defline)
142 extend_defline(c);
143 rec_pos++;
144 if (bytes_read == GCDBZ_SBUF_LEN) break;
145 }//while not EOF or space in buffer
146 /*if (bytes_read==0)
147 return NULL;*/
148 if (c==EOF) {
149 in_rec=false;
150 if (in_defline) end_defline();
151 }
152 zstream.next_in = (Bytef*)sbuf;
153 zstream.avail_in = bytes_read;
154 //deflate_flag = in_rec ? 0 : Z_FINISH;
155 deflate_flag = in_rec ? 0 : Z_FULL_FLUSH;
156 do { //compression loop
157 zstream.next_out = (Bytef*)lbuf;
158 zstream.avail_out = GCDBZ_LBUF_LEN;
159 uLong t_out=zstream.total_out;
160 err = deflate(&zstream, deflate_flag);
161 if (err !=Z_OK && err!=Z_STREAM_END)
162 GError("GCdbz error: deflate failed! (err=%d)\n", err);
163 uLong toWrite=zstream.total_out-t_out;
164 if (toWrite>0) {
165 if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
166 GError("Error writing deflate chunk!\n");
167 total_out+=toWrite;
168 zrecsize+=toWrite;
169 zpos+=toWrite;
170 }
171 } while (err!=Z_STREAM_END && zstream.avail_out==0);//compression loop
172 } //read loop
173 //if (deflate_flag!=Z_FINISH)
174 if (deflate_flag!=Z_FULL_FLUSH)
175 GError("Deflate flag not set to FINISH!\n");
176 return defline;
177 }
178
179
180 void GCdbz::decomp_start(int zrsize) {
181 zstream.zalloc = (alloc_func)0;
182 zstream.zfree = (free_func)0;
183 zstream.opaque = (voidpf)0;
184 zstream.next_in = (Bytef*)sbuf;
185 zstream.avail_in = 0;
186 zstream.next_out = (Bytef*)lbuf;
187 int err = inflateInit(&zstream);
188 if (err!=Z_OK)
189 GMessage("Error at inflateInit()\n");
190 //-- now read and discard the first record, so we can use random access later
191 // (needed by zlib)
192 int bytes_read=fread(sbuf, 1, zrsize, zf);
193 if (bytes_read<zrsize)
194 GError("Error reading 1st record from zrec file\n");
195 zstream.next_in = (Bytef*)sbuf;
196 zstream.avail_in = bytes_read;
197 //decompress first chunk
198 zstream.next_out = (Bytef*)lbuf;
199 zstream.avail_out = GCDBZ_LBUF_LEN;
200 err = inflate(&zstream, Z_SYNC_FLUSH);
201 if (err !=Z_OK && err!=Z_STREAM_END)
202 GError("GCdbz error: 1st record inflate failed! (err=%d)\n",err);
203 }
204
205 void GCdbz::decomp_end() {
206 int err = inflateEnd(&zstream);
207 if (err!=Z_OK)
208 GError("Error at inflateEnd() (err=%d)\n", err);
209
210 }
211
212
213 //record decompress
214 //returns: the number of bytes decompressed
215 int GCdbz::decompress(FILE* outf, int csize, int zfofs) {
216 if (zfofs>=0) {
217 if (fseek(zf, zfofs, 0))
218 GError("GCdbz::decompress: error fseek() to %d\n", zfofs);
219 }
220 else
221 if (feof(zf)) return 0;
222 bool in_rec=true;
223 int err=0;
224 int total_read=0;
225 int total_written=0;
226 while (in_rec) { // main read loop
227 int to_read=0;
228 int bytes_read=0;
229 if (csize<=0) { //read one byte at a time
230 to_read=1;
231 int c;
232 if ((c =fgetc(zf))!=EOF) {
233 bytes_read = 1;
234 sbuf[0]=c;
235 }
236 else {
237 //bytes_read=0;
238 return 0; //eof
239 }
240 total_read+=bytes_read;
241 }
242 else {
243 to_read = csize-total_read>GCDBZ_SBUF_LEN ?
244 GCDBZ_SBUF_LEN : csize-total_read;
245 // check for csize vs bytes_read match:
246 if (to_read==0) return 0;
247 bytes_read=fread(sbuf, 1, to_read, zf);
248 if (bytes_read!=to_read)
249 GError("Error reading from zrec file\n");
250 total_read+=bytes_read;
251 in_rec=(total_read<csize);
252 }
253 if (bytes_read==0) {
254 //GMessage("bytes_read = 0\n");
255 return 0;
256 }
257 if (in_rec && bytes_read<to_read) in_rec=false;
258 zstream.next_in = (Bytef*)sbuf;
259 zstream.avail_in = bytes_read;
260
261 do { //decompression loop
262 zstream.next_out = (Bytef*)lbuf;
263 zstream.avail_out = GCDBZ_LBUF_LEN;
264 uLong t_out=zstream.total_out;
265 err = inflate(&zstream, Z_SYNC_FLUSH);
266 uLong toWrite=zstream.total_out-t_out;
267 if (toWrite>0) {
268 if (fwrite(lbuf, 1, toWrite, outf)<toWrite) {
269 GError("Error writing inflated chunk!\n");
270 }
271 total_written+=toWrite;
272 }
273 if (err==Z_STREAM_END) {
274 in_rec=false;
275 if (total_written==0) {
276 GMessage("Z_STREAM_END found but total_written=0!\n");
277 }
278 break;
279 }
280 else if (err !=Z_OK)
281 GError("GCdbz error: inflate failed! (err=%d)\n",err);
282 } while (zstream.avail_in!=0); //decompression loop
283 } //read loop
284 /*if (err!=Z_STREAM_END) {
285 GError("decompress: Z_STREAM_END not found!\n");
286 }*/
287 return total_written;
288 }