ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GFastaFile.h
Revision: 16
Committed: Mon Jul 18 20:56:02 2011 UTC (8 years, 2 months ago) by gpertea
File size: 22369 byte(s)
Log Message:
sync with local source

Line User Rev File contents
1 gpertea 2 #ifndef GFASTAFILE_H
2     #define GFASTAFILE_H
3    
4 gpertea 16 #include "GBase.h"
5     //#include "gdna.h" -- for the reverseComplement() method
6    
7 gpertea 2 #define CAPINC 64
8     #define SEQCAPINC 256
9     #define DEF_FASTA_DELIM ">"
10    
11     class FastaSeq { /* fasta record storage */
12     public:
13     int id_cap; /* allocated size of the sequence name string*/
14     char *id; /* id only, up to first space */
15     int namelen; // real length of seq name
16     char *descr; /* any comment on the defline, after the first space */
17     int d_cap; /* allocated size of the description */
18     int descrlen; /* real length of the description */
19     //-------actual sequence :
20     int s_cap; /* allocated length of the sequence string */
21     int len; /* the actual string length of seq */
22     char* seq; /* the sequence buffer itself */
23     //----
24 gpertea 16 void init(char* cname, char* cdescr=NULL,
25     char* cseq=NULL, int sbeg=-1, int send=-1) {
26     //Warning: sbeg and send are 0-based!
27     int l=0;
28 gpertea 2 if (cname==NULL) {
29 gpertea 16 GMALLOC(id, CAPINC);
30     id_cap=CAPINC;
31     namelen=0;
32     id[0]='\0';
33     GMALLOC(descr, CAPINC);
34     }
35     else {
36     l=strlen(cname);
37     GMALLOC(id, l+1);strcpy(id,cname);
38     id_cap=l+1;
39     namelen=l;
40     }
41 gpertea 2 if (cdescr==NULL) {
42     GMALLOC(descr, CAPINC);
43     descr[0]='\0';
44     d_cap=CAPINC;
45     descrlen=0;
46     }
47     else {//copy given description
48     l=strlen(cdescr);
49     GMALLOC(descr, l+1);
50     strcpy(descr,cdescr);
51     d_cap=l+1;
52     descrlen=l;
53     }
54     if (cseq==NULL) {
55     GMALLOC(seq, SEQCAPINC);
56     seq[0]='\0';
57     len=0;
58     s_cap=SEQCAPINC;
59     }
60 gpertea 16 else { //sequence given
61     if (sbeg>=0) { //sequence range given
62     if (send<0) send=strlen(cseq)-1;
63     len=send-sbeg+1;
64     if (len>0) {
65     s_cap=len+1;
66     GMALLOC(seq, s_cap);
67     strncpy(seq, cseq+sbeg, len);
68     seq[len]=0;
69     }
70     else { //null range
71     GMALLOC(seq, SEQCAPINC);
72     seq[0]='\0';
73     len=0;
74     s_cap=SEQCAPINC;
75     }
76     }
77     else {// copy whole cseq
78     l=strlen(cseq);
79     GMALLOC(seq, l+1);
80     strcpy(seq,cseq);
81     len=l;
82     s_cap=l+1;
83     }
84 gpertea 2 }
85 gpertea 16 } //init(alldata, range)
86    
87 gpertea 2 void init(int seqalloc=0) {
88     //ntCompTableInit();
89     GMALLOC(id, CAPINC);
90     id_cap=CAPINC;
91     namelen=0;
92     id[0]='\0';
93     GMALLOC(descr, CAPINC);
94     descr[0]='\0';
95     d_cap=CAPINC;
96     descrlen=0;
97     if (seqalloc<=0) {
98     s_cap=SEQCAPINC;
99     GMALLOC(seq, SEQCAPINC);
100     }
101     else {
102     s_cap=seqalloc;
103     GMALLOC(seq, seqalloc);
104     }
105     seq[0]='\0';
106     len=0;
107     }
108 gpertea 16 FastaSeq(char* cname, char* cdescr=NULL, char* cseq=NULL) {
109     init(cname, cdescr, cseq);
110     }
111 gpertea 2 FastaSeq(int seqalloc=0) {
112     init(seqalloc);
113     }
114 gpertea 16
115     //copy constructor:
116     FastaSeq(const FastaSeq& fa,int sbeg=-1,int send=-1) {
117     if (sbeg<0) { sbeg=0; send=fa.len-1; }
118     else if (send<0) send=fa.len-1;
119     if (send>fa.len-1) send=fa.len-1;
120     init(fa.id, fa.descr, fa.seq, sbeg, send);
121     }
122    
123 gpertea 2 void clear() {
124     GFREE(id);id_cap=0;namelen=0;id=NULL;
125     GFREE(descr);d_cap=0;descrlen=0;descr=NULL;
126     GFREE(seq);s_cap=0;len=0;seq=NULL;
127     }
128     ~FastaSeq() {
129     clear();
130     }
131     int getNameLen() { return namelen; }
132     const char* getName() { return (const char*) id; }
133     const char* name() { return (const char*) id; }
134     const char* getSeqName() { return (const char*) id; }
135     const char* getId() { return (const char*) id; }
136     const char* getDescr() { return (const char*) descr; }
137     int getDescrLen() { return descrlen; }
138     const char* getSeq() { return (const char*) seq; }
139     int getSeqLen() { return len; }
140     void extendId(char c) {
141     if (namelen+1 >= id_cap) {
142     id_cap += CAPINC;
143     GREALLOC(id, id_cap);
144     }
145     id[namelen]= c;
146     namelen++;
147     }
148     void extendSeqName(char c) { extendId(c); }
149     void extendName(char c) { extendId(c); }
150     void extendDescr(char c) {
151     if (descrlen+1 >= d_cap) {
152     d_cap += CAPINC;
153     GREALLOC(descr, d_cap);
154     }
155     descr[descrlen]= c;
156     descrlen++;
157     }
158     void endId() { id[namelen]=0; }
159     void endName() { id[namelen]=0; }
160     void endSeqName() { id[namelen]=0; }
161     void endDescr() { descr[descrlen]=0; }
162     void endSeq() { seq[len]=0; }
163     void extendSeq(char c) {
164     if (len+1 >= s_cap) {
165     s_cap += SEQCAPINC;
166     GREALLOC(seq, s_cap);
167     }
168     seq[len]= c;
169     len++;
170     }
171     void compactIdMem() { if (namelen>0) {
172     GREALLOC(id, namelen+1); id_cap=namelen+1;
173     } }
174     void compactDescrMem() { if (descrlen>0) {
175     GREALLOC(descr, descrlen+1); d_cap=descrlen+1; } }
176     void compactSeqMem() { if (len>0) {
177     GREALLOC(seq, len+1); s_cap=len+1; } }
178     void compactMem() {
179     compactIdMem();
180     compactDescrMem();
181     compactSeqMem();
182     }
183     char* detachSeqPtr() { //such that the sequence allocated memory is no longer
184     // freed when the FastaSeq object is destroyed
185     // the returned pointer MUST be deallocated by the the user, later!
186     char* p=seq;
187     GMALLOC(seq, SEQCAPINC);
188     s_cap=SEQCAPINC;
189     len=0;
190     return p;
191     }
192     char* setSeqPtr(char* newseq, int newlen=0, int newcap=0) {
193     if (newlen==0) newlen=strlen(newseq);
194     if (newcap<=newlen) newcap=newlen+1;
195     GFREE(seq);
196     seq=newseq;
197     len=newlen;
198     s_cap=newcap;
199     return seq;
200     }
201     void reset() {// allocated space remains the same!
202     namelen=0;id[0]=0;
203     descrlen=0;descr[0]=0;
204     len=0;seq[0]=0;
205     }
206 gpertea 16 /*
207 gpertea 2 //reverse-complement a nucleotide sequence:
208 gpertea 16 // -- requires gdna.h
209 gpertea 2 void reverseComplement() {
210     if (len==0) return;
211     //ntCompTableInit();
212     reverseChars(seq,len);
213     for (int i=0;i<len;i++) seq[i]=ntComplement(seq[i]);
214     }
215 gpertea 16 */
216 gpertea 2 //printing fasta formatted sequence to a file stream
217     void fprint(FILE* fout, int line_len=60, bool defline=false) {
218     if (defline) {
219     if (descrlen>0) fprintf(fout, "%s %s\n", id, descr);
220     else fprintf(fout, ">%s\n", id);
221     }
222     int l=len;
223     char* p=seq;
224     while (l>0) {
225     int to_write=GMIN(line_len, l);
226     fwrite(p,1,to_write,fout);
227     fprintf(fout,"\n");
228     p+=line_len;
229     l-=line_len;
230     }
231     }
232     //
233 gpertea 16 static void write(FILE *fh, const char* seqid, const char* descr, char* seq,
234     const int linelen=70, const int seqlen=0) {
235     writeFasta(fh, seqid, descr, seq, linelen, seqlen); //from GBase.cpp
236 gpertea 2 }
237    
238    
239     };
240    
241     typedef int charFunc(char c, int pos, FastaSeq* fseq); //char processing function
242     /* passes:
243     c = current sequence character (generally aminoacid or nucleotide)
244     pos = 0-based coordinate of the given character within the sequence
245     fseq = FastaSeq pointer (useful for retrieving sequence defline info)
246     the return value is not used yet
247     */
248    
249    
250     //(for reading/writing variable length records, etc.)
251     enum fileMode {
252     fmRead,
253     fmWrite
254     };
255    
256     class GFastaFile {
257     char* fname;
258     FILE* fh;
259     fileMode fmode;
260    
261     long int rec_fpos; //the input stream offset of the current record to be read
262     long int cur_fpos; //the input stream offset of the current byte to be read
263     uint seqcoord; //1-based coordinate of the current record's sequence reading position
264     //(updated by getSeqRange() mostly)
265     protected:
266     void bad_fastafmt() {
267     GError("Error parsing file '%s'. Not a Fasta file?\n", fname);
268     }
269     void check_eof(int c) {
270     if (c == EOF) bad_fastafmt();
271     }
272     public:
273     GFastaFile(const char* filename, fileMode filemode=fmRead) {
274     fh=NULL;
275     cur_fpos=0;
276     rec_fpos=0;
277     fmode=filemode;
278     seqcoord=0;
279     const char *mode=(filemode==fmRead) ? "rb" : "wb";
280     if (filename == NULL || filename[0]=='\0') {
281     fh = (filemode == fmRead) ? stdin : stdout;
282     fname=NULL;
283     }
284     else {
285     if ((fh = fopen(filename, mode)) == NULL)
286     GError("Cannot open file '%s'!", filename);
287     fname=Gstrdup(filename);
288     }
289     /*
290     GCALLOC(curseqid, CAPINC);
291     curseqidlen=CAPINC;
292     GCALLOC(curdescr, CAPINC);
293     curdescrlen=CAPINC;*/
294     }
295    
296     //attach a GFastaFile object to an already open handle
297     GFastaFile(FILE* fhandle, fileMode filemode=fmRead, const char* filename=NULL) {
298     fh=fhandle;
299     cur_fpos=ftell(fh);
300     fmode=filemode;
301     rec_fpos=cur_fpos;
302     seqcoord=0;
303     if (filename == NULL || filename[0]=='\0') {
304     fname=NULL;
305     }
306     else
307     fname=Gstrdup(filename);
308     }
309    
310    
311     void reset() {
312     if (fh!=NULL && fh!=stdout && fh!=stdin) {
313 gpertea 16 fseeko(fh,0L, SEEK_SET);
314 gpertea 2 cur_fpos=0;
315     rec_fpos=0;
316     }
317     else GError("Cannot use GFastaFile::reset() on stdin, stdout or NULL handles.\n");
318     }
319    
320     void seek(int pos) {
321     if (fh!=NULL && fh!=stdout && fh!=stdin) {
322 gpertea 16 fseeko(fh, pos, SEEK_SET);
323 gpertea 2 cur_fpos=pos;
324     seqcoord=0; //seqcoord agnostic after a seek
325     }
326     else GError("Cannot use GFastaFile::seek() on stdin, stdout or NULL handles.\n");
327     }
328    
329     ~GFastaFile() {
330     if (fh!=NULL && fh!=stdout && fh!=stdin) fclose(fh);
331     fh=NULL;
332     GFREE(fname);
333     /*GFREE(curseqid);
334     GFREE(curdescr);*/
335     }
336    
337     int getReadPos() { return cur_fpos; } /* returns current read position in the
338     input stream (can be used within callback) */
339     int ReadSeqPos() {return rec_fpos; } /* returns the input stream offset of the last fasta
340     record processed by getFastaSeq*/
341     bool readHeader(FastaSeq& seq) { return (readHeader(&seq)!=NULL); }
342     FastaSeq* readSeq(int seqalloc=0) {
343     //allocate a new FastaSeq, reads the next record and returns it
344     //caller is responsible for deallocating returned FastaSeq memory!
345     FastaSeq* r=readHeader(NULL, seqalloc);
346     int len=0;
347     char before=1; //newline before indicator
348     int c=-1;
349     //load the whole sequence in FastaSeq
350     while ((c = getc(fh)) != EOF && c != '>') {
351     cur_fpos++;
352     //if (isspace(c) || c<31)
353     if (c<=32) {
354     before = (c=='\n' || c=='\r')?1:0;
355     continue; /* skip spaces */
356     }
357     if (len >= r->s_cap-1) {
358     GREALLOC(r->seq, r->s_cap + SEQCAPINC);
359     r->s_cap+=SEQCAPINC;
360     }
361     r->seq[len] = c;
362     before=0;
363     len++;
364     }
365     r->seq[len] = '\0';
366     r->len=len;
367     return r;
368     }
369     FastaSeq* readHeader(FastaSeq* seq=NULL, int seqalloc=0) {
370     /* reads the Fasta sequence header
371     the first character must be '>' for this call, after any spaces,
372     if seq is NULL a new FastaSeq object is allocated and returned,
373     otherwise id and descr are updated */
374     seqcoord=0;
375     int* buflen;
376     int* buflenstr;
377     char** buf;
378     int before;
379     if (feof(fh)) return NULL;
380     int c = getc(fh);
381     if (c==EOF) return NULL;
382     cur_fpos++;
383     while (c!=EOF && c<=32) { c=getc(fh); cur_fpos++; }//skip spaces etc.
384     if (c == EOF) return NULL;
385     if (c != '>')
386     bad_fastafmt();
387     if (seq==NULL) seq=new FastaSeq(seqalloc);
388     else { seq->clear(); seq->init(seqalloc); }
389    
390     int len = 0; //chars accumulated so far
391     buflen=&(seq->id_cap);
392     buf=&(seq->id);
393     buflenstr=&(seq->namelen);
394     before=1;
395     while ((c = getc(fh)) != EOF) {
396     cur_fpos++;
397     if (c=='\n' || c=='\r') break;
398     if (len >= *buflen-1) {
399     GREALLOC(*buf, *buflen + CAPINC);
400     *buflen+=CAPINC;
401     }
402     if (before && (c<=32)) {
403     // space encountered => seq_name finished
404     before=0;
405     (*buf)[len]='\0';
406     *buflenstr=len;
407     buf=&seq->descr;
408     buflen=&seq->d_cap;
409     buflenstr=&seq->descrlen;
410     len=0;
411     if (c!=1) // special case, nrdb concatenation
412     continue; // skip this space
413     }
414     (*buf)[len]=c;
415     len++;
416     }
417     (*buf)[len]='\0'; /* terminate the comment string */
418     *buflenstr = len;
419     check_eof(c); /* it's wrong to have eof here */
420     seqcoord=1;
421     return (seq->namelen==0) ? NULL : seq;
422     }
423    
424     FastaSeq *getFastaSeq(bool& is_last, FastaSeq* seq, charFunc* callbackFn = NULL ) {
425     /* seq must be a pointer to a initialized FastaSeq structure
426     if seq is NULL, the sequence is not actually read,
427     but just skipped and the file pointer set accordingly, while
428     the returned "pointer" will not be a FastaSeq one but just NULL or not NULL
429     (depending if eof was encountered)
430     if callbackFn is NULL, the sequence is read entirely in memory in a FastaSeq.seq field
431     otherwise only the defline is parsed into FastaSeq::id and FastaSeq::descr but actual
432     sequence letters are passed one by one to the callback function
433     and the actual sequence is never stored in memory (unless the callback does it)
434     */
435     int c, len;
436     int before;
437     rec_fpos=cur_fpos;
438     len = 0; //chars accumulated so far
439     if (fh==NULL || feof(fh)) return NULL;
440     // -------- read the defline first
441     if (seq==NULL) { // navigate only! don't read/parse anything but the record delimiter
442     before=1;
443     while ((c = getc(fh)) != EOF && c != '\n' && c !='\r') cur_fpos++; // skip defline
444     if (c==EOF && cur_fpos<=rec_fpos+2) return NULL;
445     check_eof(c); /* it's wrong to have eof here! */
446     cur_fpos++; //to account for the '\n' read
447     /*----- read the sequence now: */
448     before=1; /* "newline before" flag */
449     while ((c = getc(fh)) != EOF && c != '>') {
450     cur_fpos++;
451     before = (c=='\n' || c=='\r') ? 1 : 0;
452     }
453     //we should end up at a '>' character here, or EOF
454     } /* fasta fmt navigation to next sequence, no seq storage */
455     else { // sequence storage:
456     if (!readHeader(seq)) {
457     is_last=true;
458     return NULL;
459     }
460     /*----- read the actual sequence now: */
461     len=0;
462     before=1; //newline before indicator
463     if (callbackFn==NULL) { //load the whole sequence in FastaSeq
464     while ((c = getc(fh)) != EOF && c != '>') {
465     cur_fpos++;
466     //if (isspace(c) || c<31)
467     if (c<=32) {
468     before = (c=='\n' || c=='\r')?1:0;
469     continue; /* skip spaces */
470     }
471     if (len >= seq->s_cap-1) {
472     GREALLOC(seq->seq, seq->s_cap + CAPINC);
473     seq->s_cap+=CAPINC;
474     }
475     seq->seq[len] = c;
476     before=0;
477     len++;
478     }
479     seq->seq[len] = '\0';
480     seq->len=len;
481     } /* sequence storage */
482     else { //use the callback for each letter, do not store the whole sequence in FastaSeq
483     while ((c = getc(fh)) != EOF && c != '>') {
484     cur_fpos++;
485     if (c<=32) {
486     before = (c=='\n' || c=='\r')?1:0;
487     continue; /* skip spaces within sequence*/
488     }
489     (*callbackFn)(c, len, seq); //call the user function for each letter
490     before=0;
491     len++;
492     }
493     seq->len=len;
494     } /* callback sequence reading (no storage)*/
495     } /* sequence parsing */
496     if (c=='>') {
497     if (!before) bad_fastafmt(); /* '>' must only be at start of line,
498     never within the sequence ! */
499     is_last=false; /* FALSE - not the last one */
500     ungetc(c, fh);
501     }
502     else is_last=true; /* TRUE - eof() here */
503     return ((seq==NULL) ? (FastaSeq*)fh : seq); //alwayws return non NULL here!
504     } //getFastaSeq
505    
506     //simplified call to ignore the is_last flag
507     FastaSeq *getFastaSeq(FastaSeq* seq, charFunc* callbackFn = NULL) {
508     bool b;
509     if (fh==NULL || feof(fh)) return NULL;
510     return getFastaSeq(b, seq, callbackFn);
511     }
512    
513    
514     uint seqSkip(uint slen, int& c){
515     //assumes the header was read !
516     //skip exactly slen characters in the actual aa or nt sequence
517     //(spaces are not counted)
518     uint skipacc=0;
519     while (skipacc<slen && ((c=getc(fh))!= EOF && c != '>')) {
520     cur_fpos++;
521     if (c<=32) continue; //skip spaces and other non-ASCII characters
522     seqcoord++;
523     skipacc++;
524     }
525     return skipacc; //may terminate prematurely
526     }
527    
528     /* read a sequence range from the current FASTA record
529     this is much faster when rcoord>=seqcoord (i.e. when sequence
530     ranges are read sequentially)
531     if rcoord>=seqcoord assumes the header has been read already!
532     Returns the actual length of the sequence returned (0 if rcoord>seq_length)
533     and updates seqcoord, cur_fpos accordingly (rec_fpos is unchanged)
534     */
535     uint getSeqRange(FastaSeq& seq, uint rcoord, uint rlen=0) {
536     int c;
537     uint len;
538     int before;
539     rec_fpos=cur_fpos;
540     if (!seqcoord || seqcoord>rcoord) {
541     // slow -- go back to the beginning of the record
542     seek(rec_fpos);
543     readHeader(&seq); //this will also reset seqcoord to 1
544     }
545     if (rcoord!=seqcoord) {
546     seqSkip(rcoord-seqcoord, c);
547     check_eof(c);
548     if (c=='>')
549     GError("Error: '>' character found while skipping through sequence!\n");
550     }
551     len = 0; //chars accumulated so far
552     seq.seq[0]='\0';
553     seq.len=0;
554     //----- read the actual subsequence now:
555     len=0;
556     before=1; //"newline before" flag
557     while ((c = getc(fh)) != EOF && c != '>') {
558     cur_fpos++;
559     if (c<=32) continue; // skip spaces
560     if (len >= (uint) (seq.s_cap-1)) {
561     GREALLOC(seq.seq, seq.s_cap + CAPINC);
562     seq.s_cap+=CAPINC;
563     }
564     seq.seq[len] = c;
565     len++;
566     seqcoord++;
567     if (rlen>0 && len==rlen) break;
568     }
569     seq.seq[len] = '\0';
570     seq.len=len;
571     if (c=='>') bad_fastafmt(); /* '>' must only be at start of line,
572     never within the sequence ! */
573     return len;
574     } //getSeqRange
575    
576     //only for writing
577     void putFastaSeq(FastaSeq *fa, const int linelen=60) {
578     writeFasta(fh, fa->id, fa->descr, fa->seq, linelen);
579     }
580 gpertea 16 /*
581 gpertea 2 static void writeFasta(FILE *fh, char* seqid, char* descr, char* seq, const int linelen=60, const int seqlen=0) {
582     FastaSeq::write(fh, seqid, descr, seq, linelen, seqlen);
583     }
584 gpertea 16 */
585 gpertea 2 };
586    
587     // ------------- FASTA parser/handler ----
588     // REQUIRES the first character processed after init()
589     // to be the first character of the record delimiter
590     // (default: ">")
591    
592    
593     class GFastaCharHandler {
594     protected:
595     char* recdelim;
596     charFunc* seqCallBack;
597     bool in_delim;
598     int delim_pos;
599     bool in_seqname;
600     bool in_descr;
601     bool in_seq;
602     FastaSeq* rec;
603     unsigned int seq_pos;
604     void reset() {
605     in_delim=true;
606     delim_pos=0;
607     in_seqname=false;
608     in_descr=false;
609     in_seq=false;
610     seq_pos=0;
611     }
612     public:
613     GFastaCharHandler(char* recdel=DEF_FASTA_DELIM) {
614     reset();
615     rec=NULL;
616     recdelim=recdel;
617     seqCallBack=NULL;
618     }
619     GFastaCharHandler(charFunc* chrCallBack, FastaSeq* r=NULL, char* recdel=DEF_FASTA_DELIM) {
620     reset();
621     rec=r;
622     recdelim=recdel;
623     seqCallBack=chrCallBack;
624     if (rec!=NULL) rec->reset();
625     }
626     void init() {
627     init(rec, seqCallBack);
628     }
629     void init(charFunc* chrCallBack) {
630     init(rec,chrCallBack);
631     }
632     void init(FastaSeq* r) {
633     init(r,seqCallBack);
634     }
635     void init(FastaSeq* r, charFunc* chrCallBack) {
636     rec=r;
637     seqCallBack=chrCallBack;
638     if (rec==NULL)
639     GError("GFastaCharHandler::init() Error: cannot use NULL FastaSeq!\n");
640     rec->reset();
641     reset();
642     }
643     void done() {
644     if (rec==NULL)
645     GError("GFastaCharHandler::done() Error: cannot use NULL FastaSeq!\n");
646     rec->endId();
647     rec->endDescr();
648     rec->endSeq();
649     }
650    
651     //~GFastaCharHandler();
652    
653     void processChar(char c) {
654     if (in_delim) { //skip record delimiter -- but it must be there!
655     if (recdelim[delim_pos]!=c) {//the only way to detect an Id starting
656     in_seqname=true;
657     in_delim=false;
658     }
659     delim_pos++;
660     }
661     if (in_seqname) {
662     if (rec->namelen>0 && c<=32) {
663     //breaking out of seq_name
664     rec->endId();
665     if (c=='\n' || c=='\r') { //end defline
666     in_seqname=false;
667     in_seq=true;
668     }
669     else { //seqname break, not defline end
670     in_seqname=false;
671     in_descr=true;
672     }
673     } // seqname termination
674     else { //seqname continues
675     if (c>32) rec->extendId(c);
676     }
677     return;
678     } // in_seqname
679     if (in_descr) {
680     if (c=='\n' || c=='\r') { //end defline
681     rec->endDescr();
682     in_descr=false;
683     in_seq=true;
684     }
685     else rec->extendDescr(c);
686     return;
687     } // in_descr
688     if (in_seq && c>32) {
689     seq_pos++; // 1-based sequence position !
690     if (seqCallBack==NULL) rec->extendSeq(c);
691     else (*seqCallBack)(c,seq_pos,rec);
692     }
693     }
694    
695     };
696    
697    
698     #endif