ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/tophat_cpp/GBase.h
Revision: 154
Committed: Tue Jan 24 02:29:21 2012 UTC (7 years, 8 months ago) by gpertea
File size: 15343 byte(s)
Log Message:
massive update with Daehwan's work

Line File contents
1 #ifndef G_BASE_DEFINED
2 #define G_BASE_DEFINED
3 #ifndef _POSIX_SOURCE
4 //mostly for MinGW
5 #define _POSIX_SOURCE
6 #endif
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 #include <string.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <math.h>
14 #include <limits.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <stdint.h>
18
19 #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20 #ifndef __WIN32__
21 #define __WIN32__
22 #endif
23 #include <windows.h>
24 #include <io.h>
25 #define CHPATHSEP '\\'
26 #undef off_t
27 #define off_t int64_t
28 #ifdef _fseeki64
29 #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
30 #else
31 /*
32 #define _DEFINE_WIN32_FSEEKO
33 int fseeko(FILE *stream, off_t offset, int whence);
34 */
35 #define fseeko fseek
36 #endif
37 #ifdef _ftelli64
38 #define ftello(stream) _ftelli64(stream)
39 #else
40 /*
41 #define _DEFINE_WIN32_FTELLO
42 off_t ftello(FILE *stream);
43 */
44 #define ftello ftell
45 #endif
46 #else
47 #define CHPATHSEP '/'
48 #include <unistd.h>
49 #endif
50
51 #ifndef fseeko
52 #define fseeko fseek
53 #endif
54 #ifndef ftello
55 #define ftello ftell
56 #endif
57
58 #ifdef DEBUG
59 #undef NDEBUG
60 #endif
61
62 typedef int32_t int32;
63 typedef uint32_t uint32;
64
65 typedef unsigned char uchar;
66 typedef unsigned char byte;
67
68 #ifndef MAXUINT
69 #define MAXUINT ((unsigned int)-1)
70 #endif
71
72 #ifndef MAXINT
73 #define MAXINT INT_MAX
74 #endif
75
76 #ifndef MAX_UINT
77 #define MAX_UINT ((unsigned int)-1)
78 #endif
79
80 #ifndef MAX_INT
81 #define MAX_INT INT_MAX
82 #endif
83
84 typedef int64_t int64;
85 typedef uint64_t uint64;
86
87 /****************************************************************************/
88
89 #ifndef EXIT_FAILURE
90 #define EXIT_FAILURE 1
91 #endif
92
93 #ifndef EXIT_SUCCESS
94 #define EXIT_SUCCESS 0
95 #endif
96
97 /****************************************************************************/
98 #define ERR_ALLOC "Error allocating memory.\n"
99
100 //-------------------
101
102 // Debug helpers
103 #ifndef NDEBUG
104 #define GASSERT(exp) ((exp)?((void)0):(void)GAssert(#exp,__FILE__,__LINE__))
105 #ifdef TRACE
106 #define GTRACE(exp) (GMessage exp)
107 #else
108 #define GTRACE(exp) ((void)0)
109 #endif
110 #else
111 #define GASSERT(exp) ((void)0)
112 #define GTRACE(exp) ((void)0)
113 #endif
114
115 #define GERROR(exp) (GError exp)
116 /********************************** Macros ***********************************/
117 // Abolute value
118 #define GABS(val) (((val)>=0)?(val):-(val))
119
120 // Min and Max
121 #define GMAX(a,b) (((a)>(b))?(a):(b))
122 #define GMIN(a,b) (((a)>(b))?(b):(a))
123
124 // Min of three
125 #define GMIN3(x,y,z) ((x)<(y)?GMIN(x,z):GMIN(y,z))
126
127 // Max of three
128 #define GMAX3(x,y,z) ((x)>(y)?GMAX(x,z):GMAX(y,z))
129
130 // Return minimum and maximum of a, b
131 #define GMINMAX(lo,hi,a,b) ((a)<(b)?((lo)=(a),(hi)=(b)):((lo)=(b),(hi)=(a)))
132
133 // Clamp value x to range [lo..hi]
134 #define GCLAMP(lo,x,hi) ((x)<(lo)?(lo):((x)>(hi)?(hi):(x)))
135
136 typedef void* pointer;
137 typedef unsigned int uint;
138
139 typedef int GCompareProc(const pointer item1, const pointer item2);
140 typedef void GFreeProc(pointer item); //usually just delete,
141 //but may also support structures with embedded dynamic members
142
143 #define GMALLOC(ptr,size) if (!GMalloc((pointer*)(&ptr),size)) \
144 GError(ERR_ALLOC)
145 #define GCALLOC(ptr,size) if (!GCalloc((pointer*)(&ptr),size)) \
146 GError(ERR_ALLOC)
147 #define GREALLOC(ptr,size) if (!GRealloc((pointer*)(&ptr),size)) \
148 GError(ERR_ALLOC)
149 #define GFREE(ptr) GFree((pointer*)(&ptr))
150
151 inline char* strMin(char *arg1, char *arg2) {
152 return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
153 }
154
155 inline char* strMax(char *arg1, char *arg2) {
156 return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
157 }
158
159 inline int iround(double x) {
160 return (int)floor(x + 0.5);
161 }
162
163 /****************************************************************************/
164
165 inline int Gintcmp(int a, int b) {
166 //return (a>b)? 1 : ((a==b)?0:-1);
167 return a-b;
168 }
169
170 int Gstrcmp(const char* a, const char* b, int n=-1);
171 //same as strcmp but doesn't crash on NULL pointers
172
173 int Gstricmp(const char* a, const char* b, int n=-1);
174
175 //basic swap template function
176 template<class T> void Gswap(T& lhs, T& rhs) {
177 //register T tmp=lhs;
178 T tmp=lhs; //requires copy operator
179 lhs=rhs;
180 rhs=tmp;
181 }
182
183 /// bitCount_32 - this function counts the number of set bits in a value.
184 /// Ex. CountPopulation(0xF000F000) = 8
185 /// Returns 0 if the word is zero.
186 inline uint bitCount_32(uint32_t Value) {
187 #if __GNUC__ >= 4
188 return __builtin_popcount(Value);
189 #else
190 uint32_t v = Value - ((Value >> 1) & 0x55555555);
191 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
192 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
193 #endif
194 }
195
196 /// bitCount_64 - this function counts the number of set bits in a value,
197 /// (64 bit edition.)
198 inline uint bitCount_64(uint64_t Value) {
199 #if __GNUC__ >= 4
200 return __builtin_popcountll(Value);
201 #else
202 uint64_t v = Value - ((Value >> 1) & 0x5555555555555555ULL);
203 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
204 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
205 return uint((uint64_t)(v * 0x0101010101010101ULL) >> 56);
206 #endif
207 }
208
209 /// CountTrailingZeros_32 - this function performs the platform optimal form of
210 /// counting the number of zeros from the least significant bit to the first one
211 /// bit. Ex. CountTrailingZeros_32(0xFF00FF00) == 8.
212 /// Returns 32 if the word is zero.
213 inline unsigned bitCountTrailingZeros_32(uint32_t Value) {
214 #if __GNUC__ >= 4
215 return Value ? __builtin_ctz(Value) : 32;
216 #else
217 static const unsigned Mod37BitPosition[] = {
218 32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13,
219 4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
220 5, 20, 8, 19, 18
221 };
222 return Mod37BitPosition[(-Value & Value) % 37];
223 #endif
224 }
225
226 // CountTrailingZeros_64 - This function performs the platform optimal form
227 /// of counting the number of zeros from the least significant bit to the first
228 /// one bit (64 bit edition.)
229 /// Returns 64 if the word is zero.
230 inline unsigned bitCountTrailingZeros_64(uint64_t Value) {
231 #if __GNUC__ >= 4
232 return Value ? __builtin_ctzll(Value) : 64;
233 #else
234 static const unsigned Mod67Position[] = {
235 64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54,
236 4, 64, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55,
237 47, 5, 32, 65, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27,
238 29, 50, 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56,
239 7, 48, 35, 6, 34, 33, 0
240 };
241 return Mod67Position[(-Value & Value) % 67];
242 #endif
243 }
244
245 /**************** Memory management ***************************/
246
247 bool GMalloc(pointer* ptr, unsigned long size); // Allocate memory
248 bool GCalloc(pointer* ptr, unsigned long size); // Allocate and initialize memory
249 bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
250 void GFree(pointer* ptr); // Free memory, resets ptr to NULL
251
252
253 int saprintf(char **retp, const char *fmt, ...);
254
255 void GError(const char* format,...); // Error routine (aborts program)
256 void GMessage(const char* format,...);// Log message to stderr
257 // Assert failed routine:- usually not called directly but through GASSERT
258 void GAssert(const char* expression, const char* filename, unsigned int lineno);
259
260 // ****************** string manipulation *************************
261 char *Gstrdup(const char* str);
262 //duplicate a string by allocating a copy for it and returning it
263 char* Gstrdup(const char* sfrom, const char* sto);
264 //same as GStrdup, but with an early termination (e.g. on delimiter)
265
266 char* Gsubstr(const char* str, char* from, char* to=NULL);
267 //extracts a substring, allocating it, including boundaries (from/to)
268
269 int strsplit(char* str, char** fields, int maxfields, const char* delim);
270 int strsplit(char* str, char** fields, int maxfields, const char delim);
271 int strsplit(char* str, char** fields, int maxfields); //splits by tab or space
272
273 char* replaceStr(char* &str, char* newvalue);
274
275 //conversion: to Lower/Upper case
276 // creating a new string:
277 char* upCase(const char* str);
278 char* loCase(const char* str);
279 // changing string in place:
280 char* strlower(char * str);
281 char* strupper(char * str);
282
283 //strstr but for memory zones: scans a memory region
284 //for a substring:
285 void* Gmemscan(void *mem, unsigned int len,
286 void *part, unsigned int partlen);
287
288 // test if a char is in a string:
289 bool chrInStr(char c, const char* str);
290
291 char* rstrchr(char* str, char ch);
292 /* returns a pointer to the rightmost
293 occurence of ch in str - like rindex for platforms missing it*/
294
295 char* strchrs(const char* s, const char* chrs);
296 //strchr but with a set of chars instead of only one
297
298 char* rstrfind(const char* str, const char *substr);
299 // like rindex() but for strings; right side version of strstr()
300
301 char* reverseChars(char* str, int slen=0); //in place reversal of string
302
303 char* rstrstr(const char* rstart, const char *lend, const char* substr);
304 /*the reversed, rightside equivalent of strstr: starts searching
305 from right end (rstart), going back to left end (lend) and returns
306 a pointer to the last (right) matching character in str */
307
308 char* strifind(const char* str, const char* substr);
309 // the case insensitive version of strstr -- finding a string within a strin
310
311
312 //Determines if a string begins with a given prefix
313 //(returns false when any of the params is NULL,
314 // but true when prefix is '' (empty string)!)
315 bool startsWith(const char* s, const char* prefix);
316
317 bool endsWith(const char* s, const char* suffix);
318 //Note: returns true if suffix is empty string, but false if it's NULL
319
320
321 // ELF hash function for strings
322 int strhash(const char* str);
323
324
325
326 //---- generic base GSeg : genomic segment (interval) --
327 // coordinates are considered 1-based (so 0 is invalid)
328 class GSeg {
329 public:
330 uint start; //start<end always!
331 uint end;
332 GSeg(uint s=0,uint e=0) {
333 if (s>e) { start=e;end=s; }
334 else { start=s;end=e; }
335 }
336 //check for overlap with other segment
337 uint len() { return end-start+1; }
338 bool overlap(GSeg* d) {
339 //return start<d->start ? (d->start<=end) : (start<=d->end);
340 return (start<=d->end && end>=d->start);
341 }
342
343 bool overlap(GSeg& d) {
344 //return start<d.start ? (d.start<=end) : (start<=d.end);
345 return (start<=d.end && end>=d.start);
346 }
347
348 bool overlap(GSeg& d, int fuzz) {
349 //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
350 return (start<=d.end+fuzz && end+fuzz>=d.start);
351 }
352
353 bool overlap(uint s, uint e) {
354 if (s>e) { Gswap(s,e); }
355 //return start<s ? (s<=end) : (start<=e);
356 return (start<=e && end>=s);
357 }
358
359 //return the length of overlap between two segments
360 int overlapLen(GSeg* r) {
361 if (start<r->start) {
362 if (r->start>end) return 0;
363 return (r->end>end) ? end-r->start+1 : r->end-r->start+1;
364 }
365 else { //r->start<=start
366 if (start>r->end) return 0;
367 return (r->end<end)? r->end-start+1 : end-start+1;
368 }
369 }
370 int overlapLen(uint rstart, uint rend) {
371 if (rstart>rend) { Gswap(rstart,rend); }
372 if (start<rstart) {
373 if (rstart>end) return 0;
374 return (rend>end) ? end-rstart+1 : rend-rstart+1;
375 }
376 else { //rstart<=start
377 if (start>rend) return 0;
378 return (rend<end)? rend-start+1 : end-start+1;
379 }
380 }
381
382 //fuzzy coordinate matching:
383 bool coordMatch(GSeg* s, uint fuzz=0) {
384 if (fuzz==0) return (start==s->start && end==s->end);
385 uint sd = (start>s->start) ? start-s->start : s->start-start;
386 uint ed = (end>s->end) ? end-s->end : s->end-end;
387 return (sd<=fuzz && ed<=fuzz);
388 }
389 //comparison operators required for sorting
390 bool operator==(GSeg& d){
391 return (start==d.start && end==d.end);
392 }
393 bool operator<(GSeg& d){
394 return (start==d.start)?(end<d.end):(start<d.start);
395 }
396 };
397
398
399
400 //--------------------------------------------------------
401 // ************** simple line reading class for text files
402
403 //GLineReader -- text line reading/buffering class
404 class GLineReader {
405 bool closeFile;
406 int len;
407 int allocated;
408 char* buf;
409 bool isEOF;
410 FILE* file;
411 off_t filepos; //current position
412 bool pushed; //pushed back
413 int lcount; //line counter (read lines)
414 public:
415 char* chars() { return buf; }
416 char* line() { return buf; }
417 int readcount() { return lcount; } //number of lines read
418 void setFile(FILE* stream) { file=stream; }
419 int length() { return len; }
420 int size() { return len; } //same as size();
421 bool isEof() {return isEOF; }
422 bool eof() { return isEOF; }
423 off_t getfpos() { return filepos; }
424 off_t getFpos() { return filepos; }
425 char* nextLine() { return getLine(); }
426 char* getLine() { if (pushed) { pushed=false; return buf; }
427 else return getLine(file); }
428 char* getLine(FILE* stream) {
429 if (pushed) { pushed=false; return buf; }
430 else return getLine(stream, filepos); }
431 char* getLine(FILE* stream, off_t& f_pos); //read a line from a stream and update
432 // the given file position
433 void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
434 // so the next call will in fact return the same line
435 GLineReader(const char* fname) {
436 FILE* f=fopen(fname, "rb");
437 if (f==NULL) GError("Error opening file '%s'!\n",fname);
438 closeFile=true;
439 init(f);
440 }
441 GLineReader(FILE* stream=NULL, off_t fpos=0) {
442 closeFile=false;
443 init(stream,fpos);
444 }
445 void init(FILE* stream, off_t fpos=0) {
446 len=0;
447 isEOF=false;
448 allocated=1024;
449 GMALLOC(buf,allocated);
450 lcount=0;
451 buf[0]=0;
452 file=stream;
453 filepos=fpos;
454 pushed=false;
455 }
456 ~GLineReader() {
457 GFREE(buf);
458 if (closeFile) fclose(file);
459 }
460 };
461
462
463 /* extended fgets() - to read one full line from a file and
464 update the file position correctly !
465 buf will be reallocated as necessary, to fit the whole line
466 */
467 char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
468
469
470 //print int/values nicely formatted in 3-digit groups
471 char* commaprint(uint64 n);
472
473 /*********************** File management functions *********************/
474
475 // removes the last part (file or directory name) of a full path
476 // WARNING: this is a destructive operation for the given string!
477 void delFileName(char* filepath);
478
479 // returns a pointer to the last file or directory name in a full path
480 const char* getFileName(const char* filepath);
481 // returns a pointer to the file "extension" part in a filename
482 const char* getFileExt(const char* filepath);
483
484
485 int fileExists(const char* fname);
486 //returns 0 if file entry doesn't exist
487 // 1 if it's a directory
488 // 2 if it's a regular file
489 // 3 otherwise (?)
490
491 int64 fileSize(const char* fpath);
492
493 //write a formatted fasta record, fasta formatted
494 void writeFasta(FILE *fw, const char* seqid, const char* descr,
495 const char* seq, int linelen=60, int seqlen=0);
496
497 //parses the next number found in a string at the current position
498 //until a non-digit (and not a '.', 'e','E','-','+') is encountered;
499 //updates the char* pointer to be after the last digit parsed
500 bool parseNumber(char* &p, double& v);
501 bool parseDouble(char* &p, double& v); //just an alias for parseNumber
502
503 bool parseInt(char* &p, int& i);
504 bool parseUInt(char* &p, uint& i);
505 bool parseHex(char* &p, uint& i);
506
507 #endif /* G_BASE_DEFINED */