ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/cdbfasta/gcdbz.cpp
Revision: 15
Committed: Mon Jul 18 20:53:45 2011 UTC (11 years, 2 months ago) by gpertea
File size: 8926 byte(s)
Log Message:
sync with local src

Line User Rev File contents
1 gpertea 8 #include "gcdbz.h"
2    
3     GCdbz::GCdbz(FILE* azf, bool uc, int zrsize) {
4     uncompress=uc;
5     zrecsize=-1;
6     zpos=0;
7     defline_cap=1024;
8     begin_defline();
9     GMALLOC(defline, defline_cap);
10     zf=azf;
11     // FULL_FLUSH method instead of finish:
12     if (uncompress)
13     decomp_start(zrsize);
14     else
15     compress_start();
16     }
17    
18     GCdbz::~GCdbz() {
19     //if (zf!=NULL && zf!=stdout && zf!=stdin) fclose(zf);
20     // FULL_FLUSH method instead of finish
21     if (uncompress) decomp_end();
22     else
23     if (!zclosed) compress_end();
24     GFREE(defline);
25     }
26    
27    
28    
29     void GCdbz::extend_defline(int ch) {
30     if (defline_len+1 >= defline_cap) {
31     defline_cap+=(defline_cap>>2);
32     GREALLOC(defline, defline_cap);
33     }
34     defline[defline_len]= ch;
35     defline_len++;
36     }
37    
38    
39     #define DUMMY_ZREC ">AA1234567890 DNA protein\n\
40     ACGTTGCTAGCT\n\
41     NRMTPYYHEIEP\n\
42     RTASNTSPTPNS\n\
43     IKSAHPAEPPKR\n"
44    
45     void GCdbz::compress_start() {
46     //initialize zstream compression
47     zstream.zalloc = (alloc_func)0; //no alloc function to use
48     zstream.zfree = (free_func)0; //no free function to use
49     zstream.opaque = (voidpf)0; //no private object to pass to zalloc/zfree
50    
51     int err=deflateInit(&zstream, Z_DEFAULT_COMPRESSION);
52     if (err!=Z_OK)
53     GError("GCdbz error: deflateInit failed!(err=%d)\n",err);
54     zclosed=false;
55     //write a dummy record as the first record,
56     //so we can use random access (FULL_FLUSH style) later
57     char ztag[5];strcpy(ztag, "CDBZ");
58     uint32 zsize=0;
59     zstream.next_in = (Bytef*)sbuf;
60     strcpy(sbuf, DUMMY_ZREC);
61     zstream.avail_in=strlen(sbuf);
62     zstream.next_out = (Bytef*)lbuf;
63     zstream.avail_out = GCDBZ_LBUF_LEN;
64     uLong t_out=zstream.total_out;
65     err = deflate(&zstream, Z_FULL_FLUSH);
66     zsize=zstream.total_out-t_out;
67     if ((err !=Z_OK && err!=Z_STREAM_END) || zsize<=0)
68     GError("GCdbz error: deflate 1st record failed! (err=%d)\n", err);
69     //now write the header and the dummy record
70 gpertea 15 //in case this was not done before:
71     gcvt_endian_setup();
72     //gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86;
73 gpertea 8 uint32 zfv = gcvt_uint(&zsize);
74     if (fwrite(ztag, 1, 4, zf)<4 ||
75     fwrite(&zfv,1,sizeof(uint32), zf) < sizeof(uint32) ||
76     fwrite(lbuf, 1, zsize, zf) < zsize)
77     GError("Error writing 1st deflated record!\n");
78     zpos+=4+sizeof(uint32)+zsize;
79     }
80    
81     void GCdbz::compress_end() {
82     zstream.next_out = (Bytef*)lbuf;
83     zstream.avail_out = GCDBZ_LBUF_LEN;
84     zstream.avail_in = 0;
85     uLong t_out=zstream.total_out;
86     int err = deflate(&zstream, Z_FINISH);
87     if (err != Z_STREAM_END) {
88     GError("GCdbz error: deflate/Z_FINISH() failed! (err=%d) \n", err);
89     }
90     uLong toWrite=zstream.total_out-t_out;
91     if (toWrite>0) {
92     if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
93     GError("Error writing FINISH deflate chunk!\n");
94     //GError("GCdbz error: out data after Z_FINISH (%d bytes)\n",
95     // zstream.total_out-t_out);
96     }
97     err=deflateEnd(&zstream);
98     if (err!=Z_OK)
99     GError("GCdbz error: deflateEnd() failed! (err=%d) \n", err);
100     zclosed=true;
101     }
102    
103     char* GCdbz::compress(GReadBuf *readbuf, char* delim) {
104     //compress everything coming from the input stream inf
105     //until \n is encountered followed by delim
106     //returns this->defline or NULL if error encountered
107    
108     //-- WARNING: this subrutine assumes that inf file position
109     // is at the beginning of the record, right AFTER the delim
110     // (exactly as left after a previous call)
111     if (zf==NULL || uncompress)
112     GError("GCdbz Error: cannot use compress() method !\n");
113     unsigned int total_out=0;
114     int c=0;
115     bool in_rec=true;
116     int delimlen=strlen(delim);
117     zrecsize=0;
118     if ((c=readbuf->peekCmp(delim, delimlen))!=0) {
119     if (c<-1) return NULL; //end of file reached
120     GError("GCdbZ::compress error: delimiter '%s' expected at record start!\n",
121     delim);
122     }
123     bool bol=false; //beginning of line flag
124     int deflate_flag=0;
125     begin_defline();
126     int rec_pos=0;
127     int err=0;
128     while (in_rec) { // main read loop
129     int bytes_read=0;
130     while ((c=readbuf->getch())>=0) {
131     sbuf[bytes_read++]=c;
132     if (c=='\n' || c=='\r') { //beginning of line
133     bol = true;
134     if (in_defline) end_defline();
135     //look_ahead for record delimiter:
136     if (readbuf->peekCmp(delim, delimlen)==0) {
137     in_rec=false;
138     break;
139     }
140     }
141     else bol = false;
142     if (rec_pos>delimlen-1 && in_defline)
143     extend_defline(c);
144     rec_pos++;
145     if (bytes_read == GCDBZ_SBUF_LEN) break;
146     }//while not EOF or space in buffer
147     /*if (bytes_read==0)
148     return NULL;*/
149     if (c==EOF) {
150     in_rec=false;
151     if (in_defline) end_defline();
152     }
153     zstream.next_in = (Bytef*)sbuf;
154     zstream.avail_in = bytes_read;
155     //deflate_flag = in_rec ? 0 : Z_FINISH;
156     deflate_flag = in_rec ? 0 : Z_FULL_FLUSH;
157     do { //compression loop
158     zstream.next_out = (Bytef*)lbuf;
159     zstream.avail_out = GCDBZ_LBUF_LEN;
160     uLong t_out=zstream.total_out;
161     err = deflate(&zstream, deflate_flag);
162     if (err !=Z_OK && err!=Z_STREAM_END)
163     GError("GCdbz error: deflate failed! (err=%d)\n", err);
164     uLong toWrite=zstream.total_out-t_out;
165     if (toWrite>0) {
166     if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
167     GError("Error writing deflate chunk!\n");
168     total_out+=toWrite;
169     zrecsize+=toWrite;
170     zpos+=toWrite;
171     }
172     } while (err!=Z_STREAM_END && zstream.avail_out==0);//compression loop
173     } //read loop
174     //if (deflate_flag!=Z_FINISH)
175     if (deflate_flag!=Z_FULL_FLUSH)
176     GError("Deflate flag not set to FINISH!\n");
177     return defline;
178     }
179    
180    
181     void GCdbz::decomp_start(int zrsize) {
182     zstream.zalloc = (alloc_func)0;
183     zstream.zfree = (free_func)0;
184     zstream.opaque = (voidpf)0;
185     zstream.next_in = (Bytef*)sbuf;
186     zstream.avail_in = 0;
187     zstream.next_out = (Bytef*)lbuf;
188     int err = inflateInit(&zstream);
189     if (err!=Z_OK)
190     GMessage("Error at inflateInit()\n");
191     //-- now read and discard the first record, so we can use random access later
192     // (needed by zlib)
193     int bytes_read=fread(sbuf, 1, zrsize, zf);
194     if (bytes_read<zrsize)
195     GError("Error reading 1st record from zrec file\n");
196     zstream.next_in = (Bytef*)sbuf;
197     zstream.avail_in = bytes_read;
198     //decompress first chunk
199     zstream.next_out = (Bytef*)lbuf;
200     zstream.avail_out = GCDBZ_LBUF_LEN;
201     err = inflate(&zstream, Z_SYNC_FLUSH);
202     if (err !=Z_OK && err!=Z_STREAM_END)
203     GError("GCdbz error: 1st record inflate failed! (err=%d)\n",err);
204     }
205    
206     void GCdbz::decomp_end() {
207     int err = inflateEnd(&zstream);
208     if (err!=Z_OK)
209     GError("Error at inflateEnd() (err=%d)\n", err);
210    
211     }
212    
213    
214     //record decompress
215     //returns: the number of bytes decompressed
216     int GCdbz::decompress(FILE* outf, int csize, int zfofs) {
217     if (zfofs>=0) {
218 gpertea 15 if (fseeko(zf, zfofs, 0))
219     GError("GCdbz::decompress: error fseeko() to %d\n", zfofs);
220 gpertea 8 }
221     else
222     if (feof(zf)) return 0;
223     bool in_rec=true;
224     int err=0;
225     int total_read=0;
226     int total_written=0;
227     while (in_rec) { // main read loop
228     int to_read=0;
229     int bytes_read=0;
230     if (csize<=0) { //read one byte at a time
231     to_read=1;
232     int c;
233     if ((c =fgetc(zf))!=EOF) {
234     bytes_read = 1;
235     sbuf[0]=c;
236     }
237     else {
238     //bytes_read=0;
239     return 0; //eof
240     }
241     total_read+=bytes_read;
242     }
243     else {
244     to_read = csize-total_read>GCDBZ_SBUF_LEN ?
245     GCDBZ_SBUF_LEN : csize-total_read;
246     // check for csize vs bytes_read match:
247     if (to_read==0) return 0;
248     bytes_read=fread(sbuf, 1, to_read, zf);
249     if (bytes_read!=to_read)
250     GError("Error reading from zrec file\n");
251     total_read+=bytes_read;
252     in_rec=(total_read<csize);
253     }
254     if (bytes_read==0) {
255     //GMessage("bytes_read = 0\n");
256     return 0;
257     }
258     if (in_rec && bytes_read<to_read) in_rec=false;
259     zstream.next_in = (Bytef*)sbuf;
260     zstream.avail_in = bytes_read;
261    
262     do { //decompression loop
263     zstream.next_out = (Bytef*)lbuf;
264     zstream.avail_out = GCDBZ_LBUF_LEN;
265     uLong t_out=zstream.total_out;
266     err = inflate(&zstream, Z_SYNC_FLUSH);
267     uLong toWrite=zstream.total_out-t_out;
268     if (toWrite>0) {
269     if (fwrite(lbuf, 1, toWrite, outf)<toWrite) {
270     GError("Error writing inflated chunk!\n");
271     }
272     total_written+=toWrite;
273     }
274     if (err==Z_STREAM_END) {
275     in_rec=false;
276     if (total_written==0) {
277     GMessage("Z_STREAM_END found but total_written=0!\n");
278     }
279     break;
280     }
281     else if (err !=Z_OK)
282     GError("GCdbz error: inflate failed! (err=%d)\n",err);
283     } while (zstream.avail_in!=0); //decompression loop
284     } //read loop
285     /*if (err!=Z_STREAM_END) {
286     GError("decompress: Z_STREAM_END not found!\n");
287     }*/
288     return total_written;
289     }