ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBase.h
Revision: 150
Committed: Fri Jan 13 17:20:39 2012 UTC (7 years, 5 months ago) by gpertea
File size: 15343 byte(s)
Log Message:
fixed parsing bug for extractAttr(); 
Gstrcmp() and Gstricmp() take a 3rd optional param to make them work like strncmp()

Line User Rev File contents
1 gpertea 2 #ifndef G_BASE_DEFINED
2     #define G_BASE_DEFINED
3 gpertea 16 #ifndef _POSIX_SOURCE
4     //mostly for MinGW
5     #define _POSIX_SOURCE
6     #endif
7     #ifdef HAVE_CONFIG_H
8     #include "config.h"
9     #endif
10 gpertea 2 #include <string.h>
11     #include <stdlib.h>
12     #include <stdio.h>
13     #include <math.h>
14     #include <limits.h>
15     #include <sys/types.h>
16     #include <sys/stat.h>
17 gpertea 16 #include <stdint.h>
18    
19     #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20     #ifndef __WIN32__
21     #define __WIN32__
22     #endif
23 gpertea 2 #include <windows.h>
24 gpertea 16 #include <io.h>
25     #define CHPATHSEP '\\'
26     #undef off_t
27     #define off_t int64_t
28     #ifdef _fseeki64
29     #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
30     #else
31     /*
32     #define _DEFINE_WIN32_FSEEKO
33     int fseeko(FILE *stream, off_t offset, int whence);
34     */
35     #define fseeko fseek
36     #endif
37     #ifdef _ftelli64
38     #define ftello(stream) _ftelli64(stream)
39     #else
40     /*
41     #define _DEFINE_WIN32_FTELLO
42     off_t ftello(FILE *stream);
43     */
44     #define ftello ftell
45     #endif
46     #else
47     #define CHPATHSEP '/'
48     #include <unistd.h>
49 gpertea 2 #endif
50    
51 gpertea 36 #ifndef fseeko
52     #define fseeko fseek
53     #endif
54     #ifndef ftello
55     #define ftello ftell
56     #endif
57 gpertea 16
58 gpertea 2 #ifdef DEBUG
59     #undef NDEBUG
60     #endif
61    
62 gpertea 16 typedef int32_t int32;
63     typedef uint32_t uint32;
64    
65 gpertea 2 typedef unsigned char uchar;
66     typedef unsigned char byte;
67    
68     #ifndef MAXUINT
69     #define MAXUINT ((unsigned int)-1)
70     #endif
71    
72 gpertea 16 #ifndef MAXINT
73     #define MAXINT INT_MAX
74 gpertea 2 #endif
75    
76 gpertea 16 #ifndef MAX_UINT
77     #define MAX_UINT ((unsigned int)-1)
78     #endif
79    
80     #ifndef MAX_INT
81     #define MAX_INT INT_MAX
82     #endif
83    
84     typedef int64_t int64;
85     typedef uint64_t uint64;
86    
87 gpertea 2 /****************************************************************************/
88    
89     #ifndef EXIT_FAILURE
90     #define EXIT_FAILURE 1
91     #endif
92    
93     #ifndef EXIT_SUCCESS
94     #define EXIT_SUCCESS 0
95     #endif
96    
97     /****************************************************************************/
98     #define ERR_ALLOC "Error allocating memory.\n"
99    
100     //-------------------
101    
102     // Debug helpers
103     #ifndef NDEBUG
104     #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
105     #ifdef TRACE
106     #define GTRACE(exp) (GMessage exp)
107     #else
108     #define GTRACE(exp) ((void)0)
109     #endif
110     #else
111     #define GASSERT(exp) ((void)0)
112     #define GTRACE(exp) ((void)0)
113     #endif
114    
115     #define GERROR(exp) (GError exp)
116     /********************************** Macros ***********************************/
117     // Abolute value
118     #define GABS(val) (((val)>=0)?(val):-(val))
119    
120     // Min and Max
121     #define GMAX(a,b) (((a)>(b))?(a):(b))
122     #define GMIN(a,b) (((a)>(b))?(b):(a))
123    
124     // Min of three
125     #define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
126    
127     // Max of three
128     #define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
129    
130     // Return minimum and maximum of a, b
131     #define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
132    
133     // Clamp value x to range [lo..hi]
134     #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
135    
136     typedef void* pointer;
137     typedef unsigned int uint;
138    
139     typedef int GCompareProc(const pointer item1, const pointer item2);
140     typedef void GFreeProc(pointer item); //usually just delete,
141     //but may also support structures with embedded dynamic members
142    
143     #define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \
144     GError(ERR_ALLOC)
145     #define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \
146     GError(ERR_ALLOC)
147     #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
148     GError(ERR_ALLOC)
149     #define GFREE(ptr) GFree((pointer*)(&ptr))
150    
151 gpertea 16 inline char* strMin(char *arg1, char *arg2) {
152 gpertea 2 return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
153     }
154    
155 gpertea 16 inline char* strMax(char *arg1, char *arg2) {
156     return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
157     }
158    
159 gpertea 2 inline int iround(double x) {
160     return (int)floor(x + 0.5);
161     }
162    
163     /****************************************************************************/
164    
165     inline int Gintcmp(int a, int b) {
166     //return (a>b)? 1 : ((a==b)?0:-1);
167     return a-b;
168     }
169    
170 gpertea 150 int Gstrcmp(const char* a, const char* b, int n=-1);
171 gpertea 2 //same as strcmp but doesn't crash on NULL pointers
172    
173 gpertea 150 int Gstricmp(const char* a, const char* b, int n=-1);
174 gpertea 2
175 gpertea 144 //basic swap template function
176     template<class T> void Gswap(T& lhs, T& rhs) {
177     //register T tmp=lhs;
178     T tmp=lhs; //requires copy operator
179     lhs=rhs;
180     rhs=tmp;
181     }
182 gpertea 2
183 gpertea 144 /// bitCount_32 - this function counts the number of set bits in a value.
184     /// Ex. CountPopulation(0xF000F000) = 8
185     /// Returns 0 if the word is zero.
186     inline uint bitCount_32(uint32_t Value) {
187     #if __GNUC__ >= 4
188     return __builtin_popcount(Value);
189     #else
190     uint32_t v = Value - ((Value >> 1) & 0x55555555);
191     v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
192     return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
193     #endif
194 gpertea 16 }
195 gpertea 2
196 gpertea 144 /// bitCount_64 - this function counts the number of set bits in a value,
197     /// (64 bit edition.)
198     inline uint bitCount_64(uint64_t Value) {
199     #if __GNUC__ >= 4
200     return __builtin_popcountll(Value);
201     #else
202     uint64_t v = Value - ((Value >> 1) & 0x5555555555555555ULL);
203     v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
204     v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
205     return uint((uint64_t)(v * 0x0101010101010101ULL) >> 56);
206     #endif
207 gpertea 16 }
208 gpertea 2
209 gpertea 144 /// CountTrailingZeros_32 - this function performs the platform optimal form of
210     /// counting the number of zeros from the least significant bit to the first one
211     /// bit. Ex. CountTrailingZeros_32(0xFF00FF00) == 8.
212     /// Returns 32 if the word is zero.
213     inline unsigned bitCountTrailingZeros_32(uint32_t Value) {
214     #if __GNUC__ >= 4
215     return Value ? __builtin_ctz(Value) : 32;
216     #else
217     static const unsigned Mod37BitPosition[] = {
218     32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13,
219     4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
220     5, 20, 8, 19, 18
221     };
222     return Mod37BitPosition[(-Value & Value) % 37];
223     #endif
224     }
225 gpertea 2
226 gpertea 144 // CountTrailingZeros_64 - This function performs the platform optimal form
227     /// of counting the number of zeros from the least significant bit to the first
228     /// one bit (64 bit edition.)
229     /// Returns 64 if the word is zero.
230     inline unsigned bitCountTrailingZeros_64(uint64_t Value) {
231     #if __GNUC__ >= 4
232     return Value ? __builtin_ctzll(Value) : 64;
233     #else
234     static const unsigned Mod67Position[] = {
235     64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54,
236     4, 64, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55,
237     47, 5, 32, 65, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27,
238     29, 50, 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56,
239     7, 48, 35, 6, 34, 33, 0
240     };
241     return Mod67Position[(-Value & Value) % 67];
242     #endif
243     }
244 gpertea 2
245     /**************** Memory management ***************************/
246    
247     bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
248     bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
249     bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
250     void GFree(pointer* ptr); // Free memory, resets ptr to NULL
251    
252    
253 gpertea 16 int saprintf(char **retp, const char *fmt, ...);
254    
255 gpertea 2 void GError(const char* format,...); // Error routine (aborts program)
256     void GMessage(const char* format,...);// Log message to stderr
257     // Assert failed routine:- usually not called directly but through GASSERT
258     void GAssert(const char* expression, const char* filename, unsigned int lineno);
259    
260     // ****************** string manipulation *************************
261     char *Gstrdup(const char* str);
262     //duplicate a string by allocating a copy for it and returning it
263     char* Gstrdup(const char* sfrom, const char* sto);
264     //same as GStrdup, but with an early termination (e.g. on delimiter)
265    
266     char* Gsubstr(const char* str, char* from, char* to=NULL);
267     //extracts a substring, allocating it, including boundaries (from/to)
268    
269     int strsplit(char* str, char** fields, int maxfields, const char* delim);
270     int strsplit(char* str, char** fields, int maxfields, const char delim);
271     int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
272    
273     char* replaceStr(char* &str, char* newvalue);
274    
275     //conversion: to Lower/Upper case
276     // creating a new string:
277     char* upCase(const char* str);
278     char* loCase(const char* str);
279     // changing string in place:
280     char* strlower(char * str);
281     char* strupper(char * str);
282    
283     //strstr but for memory zones: scans a memory region
284     //for a substring:
285     void* Gmemscan(void *mem, unsigned int len,
286     void *part, unsigned int partlen);
287    
288     // test if a char is in a string:
289 gpertea 16 bool chrInStr(char c, const char* str);
290 gpertea 2
291     char* rstrchr(char* str, char ch);
292     /* returns a pointer to the rightmost
293     occurence of ch in str - like rindex for platforms missing it*/
294    
295 gpertea 16 char* strchrs(const char* s, const char* chrs);
296 gpertea 2 //strchr but with a set of chars instead of only one
297    
298 gpertea 90 char* rstrfind(const char* str, const char *substr);
299     // like rindex() but for strings; right side version of strstr()
300 gpertea 2
301 gpertea 90 char* reverseChars(char* str, int slen=0); //in place reversal of string
302    
303 gpertea 16 char* rstrstr(const char* rstart, const char *lend, const char* substr);
304 gpertea 2 /*the reversed, rightside equivalent of strstr: starts searching
305     from right end (rstart), going back to left end (lend) and returns
306     a pointer to the last (right) matching character in str */
307    
308 gpertea 16 char* strifind(const char* str, const char* substr);
309 gpertea 2 // the case insensitive version of strstr -- finding a string within a strin
310    
311    
312     //Determines if a string begins with a given prefix
313     //(returns false when any of the params is NULL,
314     // but true when prefix is '' (empty string)!)
315 gpertea 16 bool startsWith(const char* s, const char* prefix);
316 gpertea 2
317 gpertea 16 bool endsWith(const char* s, const char* suffix);
318     //Note: returns true if suffix is empty string, but false if it's NULL
319    
320    
321 gpertea 2 // ELF hash function for strings
322     int strhash(const char* str);
323    
324    
325    
326     //---- generic base GSeg : genomic segment (interval) --
327     // coordinates are considered 1-based (so 0 is invalid)
328     class GSeg {
329     public:
330     uint start; //start<end always!
331     uint end;
332     GSeg(uint s=0,uint e=0) {
333     if (s>e) { start=e;end=s; }
334     else { start=s;end=e; }
335     }
336     //check for overlap with other segment
337     uint len() { return end-start+1; }
338     bool overlap(GSeg* d) {
339 gpertea 16 //return start<d->start ? (d->start<=end) : (start<=d->end);
340     return (start<=d->end && end>=d->start);
341 gpertea 2 }
342    
343     bool overlap(GSeg& d) {
344 gpertea 16 //return start<d.start ? (d.start<=end) : (start<=d.end);
345     return (start<=d.end && end>=d.start);
346 gpertea 2 }
347    
348     bool overlap(GSeg& d, int fuzz) {
349 gpertea 16 //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
350     return (start<=d.end+fuzz && end+fuzz>=d.start);
351 gpertea 2 }
352    
353     bool overlap(uint s, uint e) {
354 gpertea 144 if (s>e) { Gswap(s,e); }
355 gpertea 16 //return start<s ? (s<=end) : (start<=e);
356     return (start<=e && end>=s);
357 gpertea 2 }
358    
359     //return the length of overlap between two segments
360     int overlapLen(GSeg* r) {
361     if (start<r->start) {
362     if (r->start>end) return 0;
363     return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
364     }
365     else { //r->start<=start
366     if (start>r->end) return 0;
367     return (r->end<end)? r->end-start+1 : end-start+1;
368     }
369     }
370     int overlapLen(uint rstart, uint rend) {
371 gpertea 144 if (rstart>rend) { Gswap(rstart,rend); }
372 gpertea 2 if (start<rstart) {
373     if (rstart>end) return 0;
374     return (rend>end) ? end-rstart+1 : rend-rstart+1;
375     }
376     else { //rstart<=start
377     if (start>rend) return 0;
378     return (rend<end)? rend-start+1 : end-start+1;
379     }
380     }
381    
382     //fuzzy coordinate matching:
383     bool coordMatch(GSeg* s, uint fuzz=0) {
384     if (fuzz==0) return (start==s->start && end==s->end);
385     uint sd = (start>s->start) ? start-s->start : s->start-start;
386     uint ed = (end>s->end) ? end-s->end : s->end-end;
387     return (sd<=fuzz && ed<=fuzz);
388     }
389     //comparison operators required for sorting
390     bool operator==(GSeg& d){
391     return (start==d.start && end==d.end);
392     }
393     bool operator<(GSeg& d){
394     return (start==d.start)?(end<d.end):(start<d.start);
395     }
396     };
397    
398    
399    
400     //--------------------------------------------------------
401     // ************** simple line reading class for text files
402    
403     //GLineReader -- text line reading/buffering class
404     class GLineReader {
405 gpertea 16 bool closeFile;
406 gpertea 2 int len;
407     int allocated;
408     char* buf;
409     bool isEOF;
410     FILE* file;
411     off_t filepos; //current position
412     bool pushed; //pushed back
413     int lcount; //line counter (read lines)
414     public:
415     char* chars() { return buf; }
416     char* line() { return buf; }
417     int readcount() { return lcount; } //number of lines read
418 gpertea 16 void setFile(FILE* stream) { file=stream; }
419 gpertea 2 int length() { return len; }
420     int size() { return len; } //same as size();
421     bool isEof() {return isEOF; }
422     bool eof() { return isEOF; }
423     off_t getfpos() { return filepos; }
424     off_t getFpos() { return filepos; }
425     char* nextLine() { return getLine(); }
426     char* getLine() { if (pushed) { pushed=false; return buf; }
427     else return getLine(file); }
428     char* getLine(FILE* stream) {
429     if (pushed) { pushed=false; return buf; }
430     else return getLine(stream, filepos); }
431     char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
432     // the given file position
433     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
434     // so the next call will in fact return the same line
435 gpertea 16 GLineReader(const char* fname) {
436     FILE* f=fopen(fname, "rb");
437     if (f==NULL) GError("Error opening file '%s'!\n",fname);
438     closeFile=true;
439     init(f);
440     }
441 gpertea 2 GLineReader(FILE* stream=NULL, off_t fpos=0) {
442 gpertea 16 closeFile=false;
443     init(stream,fpos);
444     }
445     void init(FILE* stream, off_t fpos=0) {
446 gpertea 2 len=0;
447     isEOF=false;
448     allocated=1024;
449     GMALLOC(buf,allocated);
450     lcount=0;
451     buf[0]=0;
452     file=stream;
453     filepos=fpos;
454     pushed=false;
455     }
456     ~GLineReader() {
457     GFREE(buf);
458 gpertea 16 if (closeFile) fclose(file);
459 gpertea 2 }
460     };
461    
462    
463     /* extended fgets() - to read one full line from a file and
464     update the file position correctly !
465     buf will be reallocated as necessary, to fit the whole line
466     */
467     char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
468    
469 gpertea 16
470     //print int/values nicely formatted in 3-digit groups
471     char* commaprint(uint64 n);
472    
473 gpertea 2 /*********************** File management functions *********************/
474    
475 gpertea 16 // removes the last part (file or directory name) of a full path
476     // WARNING: this is a destructive operation for the given string!
477 gpertea 2 void delFileName(char* filepath);
478    
479 gpertea 16 // returns a pointer to the last file or directory name in a full path
480     const char* getFileName(const char* filepath);
481     // returns a pointer to the file "extension" part in a filename
482     const char* getFileExt(const char* filepath);
483 gpertea 2
484 gpertea 16
485 gpertea 2 int fileExists(const char* fname);
486     //returns 0 if file entry doesn't exist
487     // 1 if it's a directory
488     // 2 if it's a regular file
489     // 3 otherwise (?)
490    
491 gpertea 16 int64 fileSize(const char* fpath);
492 gpertea 2
493 gpertea 16 //write a formatted fasta record, fasta formatted
494     void writeFasta(FILE *fw, const char* seqid, const char* descr,
495     const char* seq, int linelen=60, int seqlen=0);
496    
497 gpertea 2 //parses the next number found in a string at the current position
498     //until a non-digit (and not a '.', 'e','E','-','+') is encountered;
499     //updates the char* pointer to be after the last digit parsed
500     bool parseNumber(char* &p, double& v);
501     bool parseDouble(char* &p, double& v); //just an alias for parseNumber
502    
503     bool parseInt(char* &p, int& i);
504     bool parseUInt(char* &p, uint& i);
505     bool parseHex(char* &p, uint& i);
506    
507     #endif /* G_BASE_DEFINED */