ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/cdbfasta/gcdbz.cpp
Revision: 8
Committed: Mon Mar 22 22:11:25 2010 UTC (12 years, 6 months ago) by gpertea
File size: 8904 byte(s)
Log Message:
added cdbfasta source files

Line User Rev File contents
1 gpertea 8 #include "gcdbz.h"
2    
3     GCdbz::GCdbz(FILE* azf, bool uc, int zrsize) {
4     uncompress=uc;
5     zrecsize=-1;
6     zpos=0;
7     defline_cap=1024;
8     begin_defline();
9     GMALLOC(defline, defline_cap);
10     zf=azf;
11     // FULL_FLUSH method instead of finish:
12     if (uncompress)
13     decomp_start(zrsize);
14     else
15     compress_start();
16     }
17    
18     GCdbz::~GCdbz() {
19     //if (zf!=NULL && zf!=stdout && zf!=stdin) fclose(zf);
20     // FULL_FLUSH method instead of finish
21     if (uncompress) decomp_end();
22     else
23     if (!zclosed) compress_end();
24     GFREE(defline);
25     }
26    
27    
28    
29     void GCdbz::extend_defline(int ch) {
30     if (defline_len+1 >= defline_cap) {
31     defline_cap+=(defline_cap>>2);
32     GREALLOC(defline, defline_cap);
33     }
34     defline[defline_len]= ch;
35     defline_len++;
36     }
37    
38    
39     #define DUMMY_ZREC ">AA1234567890 DNA protein\n\
40     ACGTTGCTAGCT\n\
41     NRMTPYYHEIEP\n\
42     RTASNTSPTPNS\n\
43     IKSAHPAEPPKR\n"
44    
45     void GCdbz::compress_start() {
46     //initialize zstream compression
47     zstream.zalloc = (alloc_func)0; //no alloc function to use
48     zstream.zfree = (free_func)0; //no free function to use
49     zstream.opaque = (voidpf)0; //no private object to pass to zalloc/zfree
50    
51     int err=deflateInit(&zstream, Z_DEFAULT_COMPRESSION);
52     if (err!=Z_OK)
53     GError("GCdbz error: deflateInit failed!(err=%d)\n",err);
54     zclosed=false;
55     //write a dummy record as the first record,
56     //so we can use random access (FULL_FLUSH style) later
57     char ztag[5];strcpy(ztag, "CDBZ");
58     uint32 zsize=0;
59     zstream.next_in = (Bytef*)sbuf;
60     strcpy(sbuf, DUMMY_ZREC);
61     zstream.avail_in=strlen(sbuf);
62     zstream.next_out = (Bytef*)lbuf;
63     zstream.avail_out = GCDBZ_LBUF_LEN;
64     uLong t_out=zstream.total_out;
65     err = deflate(&zstream, Z_FULL_FLUSH);
66     zsize=zstream.total_out-t_out;
67     if ((err !=Z_OK && err!=Z_STREAM_END) || zsize<=0)
68     GError("GCdbz error: deflate 1st record failed! (err=%d)\n", err);
69     //now write the header and the dummy record
70     //in case this was not done before:
71     gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86;
72     uint32 zfv = gcvt_uint(&zsize);
73     if (fwrite(ztag, 1, 4, zf)<4 ||
74     fwrite(&zfv,1,sizeof(uint32), zf) < sizeof(uint32) ||
75     fwrite(lbuf, 1, zsize, zf) < zsize)
76     GError("Error writing 1st deflated record!\n");
77     zpos+=4+sizeof(uint32)+zsize;
78     }
79    
80     void GCdbz::compress_end() {
81     zstream.next_out = (Bytef*)lbuf;
82     zstream.avail_out = GCDBZ_LBUF_LEN;
83     zstream.avail_in = 0;
84     uLong t_out=zstream.total_out;
85     int err = deflate(&zstream, Z_FINISH);
86     if (err != Z_STREAM_END) {
87     GError("GCdbz error: deflate/Z_FINISH() failed! (err=%d) \n", err);
88     }
89     uLong toWrite=zstream.total_out-t_out;
90     if (toWrite>0) {
91     if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
92     GError("Error writing FINISH deflate chunk!\n");
93     //GError("GCdbz error: out data after Z_FINISH (%d bytes)\n",
94     // zstream.total_out-t_out);
95     }
96     err=deflateEnd(&zstream);
97     if (err!=Z_OK)
98     GError("GCdbz error: deflateEnd() failed! (err=%d) \n", err);
99     zclosed=true;
100     }
101    
102     char* GCdbz::compress(GReadBuf *readbuf, char* delim) {
103     //compress everything coming from the input stream inf
104     //until \n is encountered followed by delim
105     //returns this->defline or NULL if error encountered
106    
107     //-- WARNING: this subrutine assumes that inf file position
108     // is at the beginning of the record, right AFTER the delim
109     // (exactly as left after a previous call)
110     if (zf==NULL || uncompress)
111     GError("GCdbz Error: cannot use compress() method !\n");
112     unsigned int total_out=0;
113     int c=0;
114     bool in_rec=true;
115     int delimlen=strlen(delim);
116     zrecsize=0;
117     if ((c=readbuf->peekCmp(delim, delimlen))!=0) {
118     if (c<-1) return NULL; //end of file reached
119     GError("GCdbZ::compress error: delimiter '%s' expected at record start!\n",
120     delim);
121     }
122     bool bol=false; //beginning of line flag
123     int deflate_flag=0;
124     begin_defline();
125     int rec_pos=0;
126     int err=0;
127     while (in_rec) { // main read loop
128     int bytes_read=0;
129     while ((c=readbuf->getch())>=0) {
130     sbuf[bytes_read++]=c;
131     if (c=='\n' || c=='\r') { //beginning of line
132     bol = true;
133     if (in_defline) end_defline();
134     //look_ahead for record delimiter:
135     if (readbuf->peekCmp(delim, delimlen)==0) {
136     in_rec=false;
137     break;
138     }
139     }
140     else bol = false;
141     if (rec_pos>delimlen-1 && in_defline)
142     extend_defline(c);
143     rec_pos++;
144     if (bytes_read == GCDBZ_SBUF_LEN) break;
145     }//while not EOF or space in buffer
146     /*if (bytes_read==0)
147     return NULL;*/
148     if (c==EOF) {
149     in_rec=false;
150     if (in_defline) end_defline();
151     }
152     zstream.next_in = (Bytef*)sbuf;
153     zstream.avail_in = bytes_read;
154     //deflate_flag = in_rec ? 0 : Z_FINISH;
155     deflate_flag = in_rec ? 0 : Z_FULL_FLUSH;
156     do { //compression loop
157     zstream.next_out = (Bytef*)lbuf;
158     zstream.avail_out = GCDBZ_LBUF_LEN;
159     uLong t_out=zstream.total_out;
160     err = deflate(&zstream, deflate_flag);
161     if (err !=Z_OK && err!=Z_STREAM_END)
162     GError("GCdbz error: deflate failed! (err=%d)\n", err);
163     uLong toWrite=zstream.total_out-t_out;
164     if (toWrite>0) {
165     if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
166     GError("Error writing deflate chunk!\n");
167     total_out+=toWrite;
168     zrecsize+=toWrite;
169     zpos+=toWrite;
170     }
171     } while (err!=Z_STREAM_END && zstream.avail_out==0);//compression loop
172     } //read loop
173     //if (deflate_flag!=Z_FINISH)
174     if (deflate_flag!=Z_FULL_FLUSH)
175     GError("Deflate flag not set to FINISH!\n");
176     return defline;
177     }
178    
179    
180     void GCdbz::decomp_start(int zrsize) {
181     zstream.zalloc = (alloc_func)0;
182     zstream.zfree = (free_func)0;
183     zstream.opaque = (voidpf)0;
184     zstream.next_in = (Bytef*)sbuf;
185     zstream.avail_in = 0;
186     zstream.next_out = (Bytef*)lbuf;
187     int err = inflateInit(&zstream);
188     if (err!=Z_OK)
189     GMessage("Error at inflateInit()\n");
190     //-- now read and discard the first record, so we can use random access later
191     // (needed by zlib)
192     int bytes_read=fread(sbuf, 1, zrsize, zf);
193     if (bytes_read<zrsize)
194     GError("Error reading 1st record from zrec file\n");
195     zstream.next_in = (Bytef*)sbuf;
196     zstream.avail_in = bytes_read;
197     //decompress first chunk
198     zstream.next_out = (Bytef*)lbuf;
199     zstream.avail_out = GCDBZ_LBUF_LEN;
200     err = inflate(&zstream, Z_SYNC_FLUSH);
201     if (err !=Z_OK && err!=Z_STREAM_END)
202     GError("GCdbz error: 1st record inflate failed! (err=%d)\n",err);
203     }
204    
205     void GCdbz::decomp_end() {
206     int err = inflateEnd(&zstream);
207     if (err!=Z_OK)
208     GError("Error at inflateEnd() (err=%d)\n", err);
209    
210     }
211    
212    
213     //record decompress
214     //returns: the number of bytes decompressed
215     int GCdbz::decompress(FILE* outf, int csize, int zfofs) {
216     if (zfofs>=0) {
217     if (fseek(zf, zfofs, 0))
218     GError("GCdbz::decompress: error fseek() to %d\n", zfofs);
219     }
220     else
221     if (feof(zf)) return 0;
222     bool in_rec=true;
223     int err=0;
224     int total_read=0;
225     int total_written=0;
226     while (in_rec) { // main read loop
227     int to_read=0;
228     int bytes_read=0;
229     if (csize<=0) { //read one byte at a time
230     to_read=1;
231     int c;
232     if ((c =fgetc(zf))!=EOF) {
233     bytes_read = 1;
234     sbuf[0]=c;
235     }
236     else {
237     //bytes_read=0;
238     return 0; //eof
239     }
240     total_read+=bytes_read;
241     }
242     else {
243     to_read = csize-total_read>GCDBZ_SBUF_LEN ?
244     GCDBZ_SBUF_LEN : csize-total_read;
245     // check for csize vs bytes_read match:
246     if (to_read==0) return 0;
247     bytes_read=fread(sbuf, 1, to_read, zf);
248     if (bytes_read!=to_read)
249     GError("Error reading from zrec file\n");
250     total_read+=bytes_read;
251     in_rec=(total_read<csize);
252     }
253     if (bytes_read==0) {
254     //GMessage("bytes_read = 0\n");
255     return 0;
256     }
257     if (in_rec && bytes_read<to_read) in_rec=false;
258     zstream.next_in = (Bytef*)sbuf;
259     zstream.avail_in = bytes_read;
260    
261     do { //decompression loop
262     zstream.next_out = (Bytef*)lbuf;
263     zstream.avail_out = GCDBZ_LBUF_LEN;
264     uLong t_out=zstream.total_out;
265     err = inflate(&zstream, Z_SYNC_FLUSH);
266     uLong toWrite=zstream.total_out-t_out;
267     if (toWrite>0) {
268     if (fwrite(lbuf, 1, toWrite, outf)<toWrite) {
269     GError("Error writing inflated chunk!\n");
270     }
271     total_written+=toWrite;
272     }
273     if (err==Z_STREAM_END) {
274     in_rec=false;
275     if (total_written==0) {
276     GMessage("Z_STREAM_END found but total_written=0!\n");
277     }
278     break;
279     }
280     else if (err !=Z_OK)
281     GError("GCdbz error: inflate failed! (err=%d)\n",err);
282     } while (zstream.avail_in!=0); //decompression loop
283     } //read loop
284     /*if (err!=Z_STREAM_END) {
285     GError("decompress: Z_STREAM_END not found!\n");
286     }*/
287     return total_written;
288     }