ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBase.h
(Generate patch)
# Line 1 | Line 1
1   #ifndef G_BASE_DEFINED
2   #define G_BASE_DEFINED
3 <
3 > #ifndef _POSIX_SOURCE
4 > //mostly for MinGW
5 > #define _POSIX_SOURCE
6 > #endif
7 > #ifdef HAVE_CONFIG_H
8 > #include "config.h"
9 > #endif
10   #include <string.h>
11   #include <stdlib.h>
12   #include <stdio.h>
# Line 8 | Line 14
14   #include <limits.h>
15   #include <sys/types.h>
16   #include <sys/stat.h>
17 < #if defined __WIN32__ || defined _WIN32
17 > #include <stdint.h>
18 >
19 > #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20 >  #ifndef __WIN32__
21 >    #define __WIN32__
22 >  #endif
23    #include <windows.h>
24 +  #include <io.h>
25 +  #define CHPATHSEP '\\'
26 +  #undef off_t
27 +  #define off_t int64_t
28 +  #ifdef _fseeki64
29 +    #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
30 +  #else
31 +    /*
32 +    #define _DEFINE_WIN32_FSEEKO
33 +    int fseeko(FILE *stream, off_t offset, int whence);
34 +    */
35 +    #define fseeko fseek
36 +  #endif
37 +  #ifdef _ftelli64
38 +    #define ftello(stream) _ftelli64(stream)
39 +  #else
40 +    /*
41 +    #define _DEFINE_WIN32_FTELLO
42 +    off_t ftello(FILE *stream);
43 +    */
44 +    #define ftello ftell
45 +  #endif
46 + #else
47 +  #define CHPATHSEP '/'
48 +  #include <unistd.h>
49 + #endif
50 +
51 + #ifndef fseeko
52 + #define fseeko fseek
53 + #endif
54 + #ifndef ftello
55 + #define ftello ftell
56   #endif
57  
58   #ifdef DEBUG
59   #undef NDEBUG
60   #endif
61  
62 < typedef unsigned int uint32;
63 < typedef int int32;
62 > typedef int32_t int32;
63 > typedef uint32_t uint32;
64 > typedef int16_t int16;
65 > typedef uint16_t uint16;
66 >
67   typedef unsigned char uchar;
68   typedef unsigned char byte;
69  
24 // If long is natively 64 bit, use the regular fseek and ftell
25 #ifdef _NATIVE_64
26 #define ftello ftell
27 #define fseeko fseek
28 #endif
29
70   #ifndef MAXUINT
71   #define MAXUINT ((unsigned int)-1)
72   #endif
73  
74 < #if defined(_NATIVE_64) || defined(_LP64) || defined(__LP64__)
75 < typedef long int64;
76 < typedef unsigned long uint64;
77 < #else
78 < //assume 32bit environment with long long for int64 stuff
79 < typedef long long int64;
40 < typedef unsigned long long uint64;
74 > #ifndef MAXINT
75 > #define MAXINT INT_MAX
76 > #endif
77 >
78 > #ifndef MAX_UINT
79 > #define MAX_UINT ((unsigned int)-1)
80   #endif
81  
82 + #ifndef MAX_INT
83 + #define MAX_INT INT_MAX
84 + #endif
85 +
86 + typedef int64_t int64;
87 + typedef uint64_t uint64;
88 +
89   /****************************************************************************/
90  
91   #ifndef EXIT_FAILURE
# Line 52 | Line 98
98  
99   /****************************************************************************/
100   #define ERR_ALLOC "Error allocating memory.\n"
55 #if defined (__WIN32__) || defined (WIN32)
56  #define CHPATHSEP '\\'
57  #include <io.h>
58  #define ftello ftell
59  #define fseeko fseek
60 #else
61  #define CHPATHSEP '/'
62  #include <unistd.h>
63 #endif
101  
102   //-------------------
103  
# Line 113 | Line 150
150                                       GError(ERR_ALLOC)
151   #define GFREE(ptr)       GFree((pointer*)(&ptr))
152  
153 < inline char* min(char *arg1, char *arg2) {
153 > inline char* strMin(char *arg1, char *arg2) {
154      return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
155   }
156  
157 + inline char* strMax(char *arg1, char *arg2) {
158 +    return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
159 + }
160 +
161   inline int iround(double x) {
162     return (int)floor(x + 0.5);
163   }
164  
124
165   /****************************************************************************/
166  
127 inline char* max(char *arg1, char *arg2) {
128    return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
129 }
130
167   inline int Gintcmp(int a, int b) {
168   //return (a>b)? 1 : ((a==b)?0:-1);
169    return a-b;
170   }
171  
172 < int Gstrcmp(char* a, char* b);
172 > int Gstrcmp(const char* a, const char* b, int n=-1);
173   //same as strcmp but doesn't crash on NULL pointers
174  
175 < int Gstricmp(const char* a, const char* b);
140 <
141 < inline void swap(int &arg1, int &arg2){
142 < arg1 ^= arg2 ^= arg1 ^= arg2;
143 < }
144 <
145 < inline void swap(char* &arg1, char* &arg2){
146 < register char* swp=arg1;
147 < arg1=arg2; arg2=swp;
148 < }
149 <
150 < inline void swap(unsigned int &arg1, unsigned int &arg2)
151 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
152 <
153 < inline void swap(short &arg1, short &arg2)
154 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
155 <
156 < inline void swap(unsigned short &arg1, unsigned short &arg2)
157 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
175 > int Gstricmp(const char* a, const char* b, int n=-1);
176  
177 < inline void swap(long &arg1, long &arg2)
178 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
179 <
180 < inline void swap(unsigned long &arg1, unsigned long &arg2)
181 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
177 > //basic swap template function
178 > template<class T> void Gswap(T& lhs, T& rhs) {
179 > //register T tmp=lhs;
180 > T tmp=lhs; //requires copy operator
181 > lhs=rhs;
182 > rhs=tmp;
183 > }
184  
185 < inline void swap(char &arg1, char &arg2)
186 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
185 > /// bitCount_32 - this function counts the number of set bits in a value.
186 > /// Ex. CountPopulation(0xF000F000) = 8
187 > /// Returns 0 if the word is zero.
188 > inline uint bitCount_32(uint32_t Value) {
189 > #if __GNUC__ >= 4
190 >    return __builtin_popcount(Value);
191 > #else
192 >    uint32_t v = Value - ((Value >> 1) & 0x55555555);
193 >    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
194 >    return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
195 > #endif
196 >  }
197  
198 < inline void swap(unsigned char &arg1, unsigned char &arg2)
199 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
198 > /// bitCount_64 - this function counts the number of set bits in a value,
199 > /// (64 bit edition.)
200 > inline uint bitCount_64(uint64_t Value) {
201 > #if __GNUC__ >= 4
202 >    return __builtin_popcountll(Value);
203 > #else
204 >    uint64_t v = Value - ((Value >> 1) & 0x5555555555555555ULL);
205 >    v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
206 >    v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
207 >    return uint((uint64_t)(v * 0x0101010101010101ULL) >> 56);
208 > #endif
209 >  }
210  
211 < inline void swap(bool &arg1, bool &arg2)
212 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
211 > /// CountTrailingZeros_32 - this function performs the platform optimal form of
212 > /// counting the number of zeros from the least significant bit to the first one
213 > /// bit.  Ex. CountTrailingZeros_32(0xFF00FF00) == 8.
214 > /// Returns 32 if the word is zero.
215 > inline unsigned bitCountTrailingZeros_32(uint32_t Value) {
216 > #if __GNUC__ >= 4
217 >  return Value ? __builtin_ctz(Value) : 32;
218 > #else
219 >  static const unsigned Mod37BitPosition[] = {
220 >    32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13,
221 >    4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
222 >    5, 20, 8, 19, 18
223 >  };
224 >  return Mod37BitPosition[(-Value & Value) % 37];
225 > #endif
226 > }
227  
228 + // CountTrailingZeros_64 - This function performs the platform optimal form
229 + /// of counting the number of zeros from the least significant bit to the first
230 + /// one bit (64 bit edition.)
231 + /// Returns 64 if the word is zero.
232 + inline unsigned bitCountTrailingZeros_64(uint64_t Value) {
233 + #if __GNUC__ >= 4
234 +  return Value ? __builtin_ctzll(Value) : 64;
235 + #else
236 +  static const unsigned Mod67Position[] = {
237 +    64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54,
238 +    4, 64, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55,
239 +    47, 5, 32, 65, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27,
240 +    29, 50, 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56,
241 +    7, 48, 35, 6, 34, 33, 0
242 +  };
243 +  return Mod67Position[(-Value & Value) % 67];
244 + #endif
245 + }
246  
247   /**************** Memory management ***************************/
248  
# Line 179 | Line 251
251   bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
252   void GFree(pointer* ptr); // Free memory, resets ptr to NULL
253  
254 < /********************* debug functions *********************/
254 >
255 > int saprintf(char **retp, const char *fmt, ...);
256  
257   void GError(const char* format,...); // Error routine (aborts program)
258   void GMessage(const char* format,...);// Log message to stderr
259   // Assert failed routine:- usually not called directly but through GASSERT
260   void GAssert(const char* expression, const char* filename, unsigned int lineno);
261  
189
262   // ****************** string manipulation *************************
263   char *Gstrdup(const char* str);
264   //duplicate a string by allocating a copy for it and returning it
# Line 216 | Line 288
288                    void *part, unsigned int partlen);
289  
290   // test if a char is in a string:
291 < bool chrInStr(char c, char* str);
291 > bool chrInStr(char c, const char* str);
292  
293   char* rstrchr(char* str, char ch);
294   /* returns a pointer to the rightmost
295    occurence of ch in str - like rindex for platforms missing it*/
296  
297 < char* strchrs(char* s, const char* chrs);
297 > char* strchrs(const char* s, const char* chrs);
298   //strchr but with a set of chars instead of only one
299  
300 < char* rstrfind(char* str, const char *substr); /* like rindex() but for strings
301 < or like the right side version of strstr()
230 < */
231 < //reverse character string or
232 < char* reverseChars(char* str, int slen=0);
300 > char* rstrfind(const char* str, const char *substr);
301 > // like rindex() but for strings;  right side version of strstr()
302  
303 < char* rstrstr(char* rstart, char *lend, char* substr);
303 > char* reverseChars(char* str, int slen=0); //in place reversal of string
304 >
305 > char* rstrstr(const char* rstart, const char *lend, const char* substr);
306   /*the reversed, rightside equivalent of strstr: starts searching
307   from right end (rstart), going back to left end (lend) and returns
308   a pointer to the last (right) matching character in str */
309  
310 < char* strifind(char* str,  const char* substr);
310 > char* strifind(const char* str,  const char* substr);
311   // the case insensitive version of strstr -- finding a string within a strin
312  
313  
314   //Determines if a string begins with a given prefix
315   //(returns false when any of the params is NULL,
316   // but true when prefix is '' (empty string)!)
317 < bool startsWith(char* s, const char* prefix);
317 > bool startsWith(const char* s, const char* prefix);
318 >
319 > bool endsWith(const char* s, const char* suffix);
320 > //Note: returns true if suffix is empty string, but false if it's NULL
321 >
322  
323   // ELF hash function for strings
324   int strhash(const char* str);
# Line 263 | Line 338
338    //check for overlap with other segment
339    uint len() { return end-start+1; }
340    bool overlap(GSeg* d) {
341 <     return start<d->start ? (d->start<=end) : (start<=d->end);
341 >     //return start<d->start ? (d->start<=end) : (start<=d->end);
342 >     return (start<=d->end && end>=d->start);
343       }
344  
345    bool overlap(GSeg& d) {
346 <     return start<d.start ? (d.start<=end) : (start<=d.end);
346 >     //return start<d.start ? (d.start<=end) : (start<=d.end);
347 >     return (start<=d.end && end>=d.start);
348       }
349  
350    bool overlap(GSeg& d, int fuzz) {
351 <     return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
351 >     //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
352 >     return (start<=d.end+fuzz && end+fuzz>=d.start);
353       }
354  
355    bool overlap(uint s, uint e) {
356 <    if (s>e) { swap(s,e); }
357 <     return start<s ? (s<=end) : (start<=e);
356 >     if (s>e) { Gswap(s,e); }
357 >     //return start<s ? (s<=end) : (start<=e);
358 >     return (start<=e && end>=s);
359       }
360  
361    //return the length of overlap between two segments
# Line 291 | Line 370
370          }
371       }
372    int overlapLen(uint rstart, uint rend) {
373 <     if (rstart>rend) { swap(rstart,rend); }
373 >     if (rstart>rend) { Gswap(rstart,rend); }
374       if (start<rstart) {
375          if (rstart>end) return 0;
376          return (rend>end) ? end-rstart+1 : rend-rstart+1;
# Line 313 | Line 392
392    bool operator==(GSeg& d){
393        return (start==d.start && end==d.end);
394        }
316  bool operator>(GSeg& d){
317     return (start==d.start)?(end>d.end):(start>d.start);
318     }
395    bool operator<(GSeg& d){
396       return (start==d.start)?(end<d.end):(start<d.start);
397       }
# Line 328 | Line 404
404  
405   //GLineReader -- text line reading/buffering class
406   class GLineReader {
407 +   bool closeFile;
408     int len;
409     int allocated;
410     char* buf;
# Line 340 | Line 417
417     char* chars() { return buf; }
418     char* line() { return buf; }
419     int readcount() { return lcount; } //number of lines read
420 +   void setFile(FILE* stream) { file=stream; }
421     int length() { return len; }
422     int size() { return len; } //same as size();
423     bool isEof() {return isEOF; }
# Line 356 | Line 434
434                             // the given file position
435     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
436              // so the next call will in fact return the same line
437 +   GLineReader(const char* fname) {
438 +      FILE* f=fopen(fname, "rb");
439 +      if (f==NULL) GError("Error opening file '%s'!\n",fname);
440 +      closeFile=true;
441 +      init(f);
442 +      }
443     GLineReader(FILE* stream=NULL, off_t fpos=0) {
444 +     closeFile=false;
445 +     init(stream,fpos);
446 +     }
447 +   void init(FILE* stream, off_t fpos=0) {
448       len=0;
449       isEOF=false;
450       allocated=1024;
# Line 369 | Line 457
457       }
458     ~GLineReader() {
459       GFREE(buf);
460 +     if (closeFile) fclose(file);
461       }
462   };
463  
# Line 379 | Line 468
468    */
469   char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
470  
471 +
472 + //print int/values nicely formatted in 3-digit groups
473 + char* commaprint(uint64 n);
474 +
475   /*********************** File management functions *********************/
476  
477 < // removes the directory part from a full-path file name
478 < // this is a destructive operation for the given string!
477 > // removes the last part (file or directory name) of a full path
478 > // WARNING: this is a destructive operation for the given string!
479   void delFileName(char* filepath);
480  
481 < // returns a pointer to the file name part in a full-path filename
482 < char* getFileName(char* filepath);
481 > // returns a pointer to the last file or directory name in a full path
482 > const char* getFileName(const char* filepath);
483 > // returns a pointer to the file "extension" part in a filename
484 > const char* getFileExt(const char* filepath);
485 >
486  
487   int fileExists(const char* fname);
488   //returns 0 if file entry doesn't exist
# Line 394 | Line 490
490   //        2 if it's a regular file
491   //        3 otherwise (?)
492  
493 < off_t fileSize(const char* fpath);
493 > int64 fileSize(const char* fpath);
494 >
495 > //write a formatted fasta record, fasta formatted
496 > void writeFasta(FILE *fw, const char* seqid, const char* descr,
497 >        const char* seq, int linelen=60, int seqlen=0);
498  
499   //parses the next number found in a string at the current position
500   //until a non-digit (and not a '.', 'e','E','-','+') is encountered;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines