ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBase.h
Revision: 310
Committed: Fri Mar 22 20:06:27 2013 UTC (6 years, 2 months ago) by gpertea
File size: 13372 byte(s)
Log Message:
sync with igm repo

Line User Rev File contents
1 gpertea 2 #ifndef G_BASE_DEFINED
2     #define G_BASE_DEFINED
3 gpertea 16 #ifndef _POSIX_SOURCE
4     //mostly for MinGW
5     #define _POSIX_SOURCE
6     #endif
7     #ifdef HAVE_CONFIG_H
8     #include "config.h"
9     #endif
10 gpertea 2 #include <string.h>
11     #include <stdlib.h>
12     #include <stdio.h>
13     #include <math.h>
14     #include <limits.h>
15     #include <sys/types.h>
16     #include <sys/stat.h>
17 gpertea 16 #include <stdint.h>
18    
19     #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20     #ifndef __WIN32__
21     #define __WIN32__
22     #endif
23 gpertea 2 #include <windows.h>
24 gpertea 16 #include <io.h>
25     #define CHPATHSEP '\\'
26     #undef off_t
27     #define off_t int64_t
28 gpertea 173 #ifndef popen
29     #define popen _popen
30     #endif
31 gpertea 310 #ifndef fseeko
32     #ifdef _fseeki64
33     #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
34     #else
35     /*
36     #define _DEFINE_WIN32_FSEEKO
37     int fseeko(FILE *stream, off_t offset, int whence);
38     */
39     #define fseeko fseek
40     #endif
41 gpertea 16 #endif
42 gpertea 310 #ifndef ftello
43 gpertea 16 #ifdef _ftelli64
44     #define ftello(stream) _ftelli64(stream)
45     #else
46     /*
47     #define _DEFINE_WIN32_FTELLO
48     off_t ftello(FILE *stream);
49     */
50     #define ftello ftell
51     #endif
52 gpertea 310 #endif
53 gpertea 16 #else
54     #define CHPATHSEP '/'
55     #include <unistd.h>
56 gpertea 2 #endif
57    
58 gpertea 36 #ifndef fseeko
59     #define fseeko fseek
60     #endif
61     #ifndef ftello
62     #define ftello ftell
63     #endif
64 gpertea 16
65 gpertea 2 #ifdef DEBUG
66     #undef NDEBUG
67     #endif
68    
69 gpertea 16 typedef int32_t int32;
70     typedef uint32_t uint32;
71 gpertea 171 typedef int16_t int16;
72     typedef uint16_t uint16;
73 gpertea 16
74 gpertea 2 typedef unsigned char uchar;
75     typedef unsigned char byte;
76    
77     #ifndef MAXUINT
78     #define MAXUINT ((unsigned int)-1)
79     #endif
80    
81 gpertea 16 #ifndef MAXINT
82     #define MAXINT INT_MAX
83 gpertea 2 #endif
84    
85 gpertea 16 #ifndef MAX_UINT
86     #define MAX_UINT ((unsigned int)-1)
87     #endif
88    
89     #ifndef MAX_INT
90     #define MAX_INT INT_MAX
91     #endif
92    
93     typedef int64_t int64;
94     typedef uint64_t uint64;
95    
96 gpertea 2 /****************************************************************************/
97    
98     #ifndef EXIT_FAILURE
99     #define EXIT_FAILURE 1
100     #endif
101    
102     #ifndef EXIT_SUCCESS
103     #define EXIT_SUCCESS 0
104     #endif
105    
106     /****************************************************************************/
107     #define ERR_ALLOC "Error allocating memory.\n"
108    
109     //-------------------
110    
111     // Debug helpers
112     #ifndef NDEBUG
113     #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
114     #ifdef TRACE
115     #define GTRACE(exp) (GMessage exp)
116     #else
117     #define GTRACE(exp) ((void)0)
118     #endif
119     #else
120     #define GASSERT(exp) ((void)0)
121     #define GTRACE(exp) ((void)0)
122     #endif
123    
124     #define GERROR(exp) (GError exp)
125     /********************************** Macros ***********************************/
126     // Abolute value
127     #define GABS(val) (((val)>=0)?(val):-(val))
128    
129     // Min and Max
130     #define GMAX(a,b) (((a)>(b))?(a):(b))
131     #define GMIN(a,b) (((a)>(b))?(b):(a))
132    
133     // Min of three
134     #define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
135    
136     // Max of three
137     #define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
138    
139     // Return minimum and maximum of a, b
140     #define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
141    
142     // Clamp value x to range [lo..hi]
143     #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
144    
145     typedef void* pointer;
146     typedef unsigned int uint;
147    
148     typedef int GCompareProc(const pointer item1, const pointer item2);
149 gpertea 250 typedef long GFStoreProc(const pointer item1, FILE* fstorage); //for serialization
150     typedef pointer GFLoadProc(FILE* fstorage); //for deserialization
151    
152 gpertea 2 typedef void GFreeProc(pointer item); //usually just delete,
153     //but may also support structures with embedded dynamic members
154    
155     #define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \
156     GError(ERR_ALLOC)
157     #define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \
158     GError(ERR_ALLOC)
159     #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
160     GError(ERR_ALLOC)
161     #define GFREE(ptr) GFree((pointer*)(&ptr))
162    
163 gpertea 16 inline char* strMin(char *arg1, char *arg2) {
164 gpertea 2 return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
165     }
166    
167 gpertea 16 inline char* strMax(char *arg1, char *arg2) {
168     return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
169     }
170    
171 gpertea 2 inline int iround(double x) {
172     return (int)floor(x + 0.5);
173     }
174    
175     /****************************************************************************/
176    
177     inline int Gintcmp(int a, int b) {
178     //return (a>b)? 1 : ((a==b)?0:-1);
179     return a-b;
180     }
181    
182 gpertea 150 int Gstrcmp(const char* a, const char* b, int n=-1);
183 gpertea 2 //same as strcmp but doesn't crash on NULL pointers
184    
185 gpertea 150 int Gstricmp(const char* a, const char* b, int n=-1);
186 gpertea 2
187 gpertea 144 //basic swap template function
188     template<class T> void Gswap(T& lhs, T& rhs) {
189     //register T tmp=lhs;
190     T tmp=lhs; //requires copy operator
191     lhs=rhs;
192     rhs=tmp;
193     }
194 gpertea 2
195    
196     /**************** Memory management ***************************/
197    
198     bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
199     bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
200     bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
201     void GFree(pointer* ptr); // Free memory, resets ptr to NULL
202    
203    
204 gpertea 258 //int saprintf(char **retp, const char *fmt, ...);
205 gpertea 16
206 gpertea 2 void GError(const char* format,...); // Error routine (aborts program)
207     void GMessage(const char* format,...);// Log message to stderr
208     // Assert failed routine:- usually not called directly but through GASSERT
209     void GAssert(const char* expression, const char* filename, unsigned int lineno);
210    
211     // ****************** string manipulation *************************
212     char *Gstrdup(const char* str);
213     //duplicate a string by allocating a copy for it and returning it
214     char* Gstrdup(const char* sfrom, const char* sto);
215     //same as GStrdup, but with an early termination (e.g. on delimiter)
216    
217     char* Gsubstr(const char* str, char* from, char* to=NULL);
218     //extracts a substring, allocating it, including boundaries (from/to)
219    
220     int strsplit(char* str, char** fields, int maxfields, const char* delim);
221     int strsplit(char* str, char** fields, int maxfields, const char delim);
222     int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
223    
224     char* replaceStr(char* &str, char* newvalue);
225    
226     //conversion: to Lower/Upper case
227     // creating a new string:
228     char* upCase(const char* str);
229     char* loCase(const char* str);
230     // changing string in place:
231     char* strlower(char * str);
232     char* strupper(char * str);
233    
234     //strstr but for memory zones: scans a memory region
235     //for a substring:
236     void* Gmemscan(void *mem, unsigned int len,
237     void *part, unsigned int partlen);
238    
239     // test if a char is in a string:
240 gpertea 16 bool chrInStr(char c, const char* str);
241 gpertea 2
242     char* rstrchr(char* str, char ch);
243     /* returns a pointer to the rightmost
244     occurence of ch in str - like rindex for platforms missing it*/
245    
246 gpertea 16 char* strchrs(const char* s, const char* chrs);
247 gpertea 2 //strchr but with a set of chars instead of only one
248    
249 gpertea 90 char* rstrfind(const char* str, const char *substr);
250     // like rindex() but for strings; right side version of strstr()
251 gpertea 2
252 gpertea 90 char* reverseChars(char* str, int slen=0); //in place reversal of string
253    
254 gpertea 16 char* rstrstr(const char* rstart, const char *lend, const char* substr);
255 gpertea 2 /*the reversed, rightside equivalent of strstr: starts searching
256     from right end (rstart), going back to left end (lend) and returns
257     a pointer to the last (right) matching character in str */
258    
259 gpertea 16 char* strifind(const char* str, const char* substr);
260 gpertea 2 // the case insensitive version of strstr -- finding a string within a strin
261    
262    
263     //Determines if a string begins with a given prefix
264     //(returns false when any of the params is NULL,
265     // but true when prefix is '' (empty string)!)
266 gpertea 16 bool startsWith(const char* s, const char* prefix);
267 gpertea 2
268 gpertea 16 bool endsWith(const char* s, const char* suffix);
269     //Note: returns true if suffix is empty string, but false if it's NULL
270    
271    
272 gpertea 2 // ELF hash function for strings
273     int strhash(const char* str);
274    
275    
276    
277     //---- generic base GSeg : genomic segment (interval) --
278     // coordinates are considered 1-based (so 0 is invalid)
279     class GSeg {
280     public:
281     uint start; //start<end always!
282     uint end;
283     GSeg(uint s=0,uint e=0) {
284     if (s>e) { start=e;end=s; }
285     else { start=s;end=e; }
286     }
287     //check for overlap with other segment
288     uint len() { return end-start+1; }
289     bool overlap(GSeg* d) {
290 gpertea 16 //return start<d->start ? (d->start<=end) : (start<=d->end);
291     return (start<=d->end && end>=d->start);
292 gpertea 2 }
293    
294     bool overlap(GSeg& d) {
295 gpertea 16 //return start<d.start ? (d.start<=end) : (start<=d.end);
296     return (start<=d.end && end>=d.start);
297 gpertea 2 }
298    
299     bool overlap(GSeg& d, int fuzz) {
300 gpertea 16 //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
301     return (start<=d.end+fuzz && end+fuzz>=d.start);
302 gpertea 2 }
303    
304     bool overlap(uint s, uint e) {
305 gpertea 144 if (s>e) { Gswap(s,e); }
306 gpertea 16 //return start<s ? (s<=end) : (start<=e);
307     return (start<=e && end>=s);
308 gpertea 2 }
309    
310     //return the length of overlap between two segments
311     int overlapLen(GSeg* r) {
312     if (start<r->start) {
313     if (r->start>end) return 0;
314     return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
315     }
316     else { //r->start<=start
317     if (start>r->end) return 0;
318     return (r->end<end)? r->end-start+1 : end-start+1;
319     }
320     }
321     int overlapLen(uint rstart, uint rend) {
322 gpertea 144 if (rstart>rend) { Gswap(rstart,rend); }
323 gpertea 2 if (start<rstart) {
324     if (rstart>end) return 0;
325     return (rend>end) ? end-rstart+1 : rend-rstart+1;
326     }
327     else { //rstart<=start
328     if (start>rend) return 0;
329     return (rend<end)? rend-start+1 : end-start+1;
330     }
331     }
332    
333     //fuzzy coordinate matching:
334     bool coordMatch(GSeg* s, uint fuzz=0) {
335     if (fuzz==0) return (start==s->start && end==s->end);
336     uint sd = (start>s->start) ? start-s->start : s->start-start;
337     uint ed = (end>s->end) ? end-s->end : s->end-end;
338     return (sd<=fuzz && ed<=fuzz);
339     }
340     //comparison operators required for sorting
341     bool operator==(GSeg& d){
342     return (start==d.start && end==d.end);
343     }
344     bool operator<(GSeg& d){
345     return (start==d.start)?(end<d.end):(start<d.start);
346     }
347     };
348    
349    
350    
351     //--------------------------------------------------------
352     // ************** simple line reading class for text files
353    
354     //GLineReader -- text line reading/buffering class
355     class GLineReader {
356 gpertea 16 bool closeFile;
357 gpertea 2 int len;
358     int allocated;
359     char* buf;
360     bool isEOF;
361     FILE* file;
362     off_t filepos; //current position
363     bool pushed; //pushed back
364     int lcount; //line counter (read lines)
365     public:
366     char* chars() { return buf; }
367     char* line() { return buf; }
368     int readcount() { return lcount; } //number of lines read
369 gpertea 16 void setFile(FILE* stream) { file=stream; }
370 gpertea 2 int length() { return len; }
371     int size() { return len; } //same as size();
372     bool isEof() {return isEOF; }
373     bool eof() { return isEOF; }
374     off_t getfpos() { return filepos; }
375     off_t getFpos() { return filepos; }
376     char* nextLine() { return getLine(); }
377     char* getLine() { if (pushed) { pushed=false; return buf; }
378     else return getLine(file); }
379     char* getLine(FILE* stream) {
380     if (pushed) { pushed=false; return buf; }
381     else return getLine(stream, filepos); }
382     char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
383     // the given file position
384     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
385     // so the next call will in fact return the same line
386 gpertea 16 GLineReader(const char* fname) {
387     FILE* f=fopen(fname, "rb");
388     if (f==NULL) GError("Error opening file '%s'!\n",fname);
389     closeFile=true;
390     init(f);
391     }
392 gpertea 2 GLineReader(FILE* stream=NULL, off_t fpos=0) {
393 gpertea 16 closeFile=false;
394     init(stream,fpos);
395     }
396     void init(FILE* stream, off_t fpos=0) {
397 gpertea 2 len=0;
398     isEOF=false;
399     allocated=1024;
400     GMALLOC(buf,allocated);
401     lcount=0;
402     buf[0]=0;
403     file=stream;
404     filepos=fpos;
405     pushed=false;
406     }
407     ~GLineReader() {
408     GFREE(buf);
409 gpertea 16 if (closeFile) fclose(file);
410 gpertea 2 }
411     };
412    
413    
414     /* extended fgets() - to read one full line from a file and
415     update the file position correctly !
416     buf will be reallocated as necessary, to fit the whole line
417     */
418     char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
419    
420 gpertea 16
421     //print int/values nicely formatted in 3-digit groups
422     char* commaprint(uint64 n);
423    
424 gpertea 2 /*********************** File management functions *********************/
425    
426 gpertea 16 // removes the last part (file or directory name) of a full path
427     // WARNING: this is a destructive operation for the given string!
428 gpertea 2 void delFileName(char* filepath);
429    
430 gpertea 16 // returns a pointer to the last file or directory name in a full path
431     const char* getFileName(const char* filepath);
432     // returns a pointer to the file "extension" part in a filename
433     const char* getFileExt(const char* filepath);
434 gpertea 2
435 gpertea 16
436 gpertea 2 int fileExists(const char* fname);
437     //returns 0 if file entry doesn't exist
438     // 1 if it's a directory
439     // 2 if it's a regular file
440     // 3 otherwise (?)
441    
442 gpertea 16 int64 fileSize(const char* fpath);
443 gpertea 2
444 gpertea 16 //write a formatted fasta record, fasta formatted
445     void writeFasta(FILE *fw, const char* seqid, const char* descr,
446     const char* seq, int linelen=60, int seqlen=0);
447    
448 gpertea 2 //parses the next number found in a string at the current position
449     //until a non-digit (and not a '.', 'e','E','-','+') is encountered;
450     //updates the char* pointer to be after the last digit parsed
451     bool parseNumber(char* &p, double& v);
452     bool parseDouble(char* &p, double& v); //just an alias for parseNumber
453    
454     bool parseInt(char* &p, int& i);
455     bool parseUInt(char* &p, uint& i);
456     bool parseHex(char* &p, uint& i);
457    
458     #endif /* G_BASE_DEFINED */