ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/gclib/gclib/GBase.h
(Generate patch)
# Line 1 | Line 1
1   #ifndef G_BASE_DEFINED
2   #define G_BASE_DEFINED
3 <
3 > #ifndef _POSIX_SOURCE
4 > //mostly for MinGW
5 > #define _POSIX_SOURCE
6 > #endif
7 > #ifdef HAVE_CONFIG_H
8 > #include "config.h"
9 > #endif
10   #include <string.h>
11   #include <stdlib.h>
12   #include <stdio.h>
# Line 8 | Line 14
14   #include <limits.h>
15   #include <sys/types.h>
16   #include <sys/stat.h>
17 < #if defined __WIN32__ || defined _WIN32
17 > #include <stdint.h>
18 >
19 > #if defined __WIN32__ || defined WIN32 || defined _WIN32 || defined _WIN32_
20 >  #ifndef __WIN32__
21 >    #define __WIN32__
22 >  #endif
23    #include <windows.h>
24 +  #include <io.h>
25 +  #define CHPATHSEP '\\'
26 +  #undef off_t
27 +  #define off_t int64_t
28 +  #ifdef _fseeki64
29 +    #define fseeko(stream, offset, origin) _fseeki64(stream, offset, origin)
30 +  #else
31 +    /*
32 +    #define _DEFINE_WIN32_FSEEKO
33 +    int fseeko(FILE *stream, off_t offset, int whence);
34 +    */
35 +    #define fseeko fseek
36 +  #endif
37 +  #ifdef _ftelli64
38 +    #define ftello(stream) _ftelli64(stream)
39 +  #else
40 +    /*
41 +    #define _DEFINE_WIN32_FTELLO
42 +    off_t ftello(FILE *stream);
43 +    */
44 +    #define ftello ftell
45 +  #endif
46 + #else
47 +  #define CHPATHSEP '/'
48 +  #include <unistd.h>
49 + #endif
50 +
51 + #ifndef fseeko
52 + #define fseeko fseek
53 + #endif
54 + #ifndef ftello
55 + #define ftello ftell
56   #endif
57  
58   #ifdef DEBUG
59   #undef NDEBUG
60   #endif
61  
62 < typedef unsigned int uint32;
63 < typedef int int32;
62 > typedef int32_t int32;
63 > typedef uint32_t uint32;
64 >
65   typedef unsigned char uchar;
66   typedef unsigned char byte;
67  
24 // If long is natively 64 bit, use the regular fseek and ftell
25 #ifdef _NATIVE_64
26 #define ftello ftell
27 #define fseeko fseek
28 #endif
29
68   #ifndef MAXUINT
69   #define MAXUINT ((unsigned int)-1)
70   #endif
71  
72 < #if defined(_NATIVE_64) || defined(_LP64) || defined(__LP64__)
73 < typedef long int64;
74 < typedef unsigned long uint64;
75 < #else
76 < //assume 32bit environment with long long for int64 stuff
77 < typedef long long int64;
40 < typedef unsigned long long uint64;
72 > #ifndef MAXINT
73 > #define MAXINT INT_MAX
74 > #endif
75 >
76 > #ifndef MAX_UINT
77 > #define MAX_UINT ((unsigned int)-1)
78   #endif
79  
80 + #ifndef MAX_INT
81 + #define MAX_INT INT_MAX
82 + #endif
83 +
84 + typedef int64_t int64;
85 + typedef uint64_t uint64;
86 +
87   /****************************************************************************/
88  
89   #ifndef EXIT_FAILURE
# Line 52 | Line 96
96  
97   /****************************************************************************/
98   #define ERR_ALLOC "Error allocating memory.\n"
55 #if defined (__WIN32__) || defined (WIN32)
56  #define CHPATHSEP '\\'
57  #include <io.h>
58  #define ftello ftell
59  #define fseeko fseek
60 #else
61  #define CHPATHSEP '/'
62  #include <unistd.h>
63 #endif
99  
100   //-------------------
101  
# Line 113 | Line 148
148                                       GError(ERR_ALLOC)
149   #define GFREE(ptr)       GFree((pointer*)(&ptr))
150  
151 < inline char* min(char *arg1, char *arg2) {
151 > inline char* strMin(char *arg1, char *arg2) {
152      return (strcmp(arg1, arg2) < 0)? arg1 : arg2;
153   }
154  
155 + inline char* strMax(char *arg1, char *arg2) {
156 +    return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
157 + }
158 +
159   inline int iround(double x) {
160     return (int)floor(x + 0.5);
161   }
162  
124
163   /****************************************************************************/
164  
127 inline char* max(char *arg1, char *arg2) {
128    return (strcmp(arg2, arg1) < 0)? arg1 : arg2;
129 }
130
165   inline int Gintcmp(int a, int b) {
166   //return (a>b)? 1 : ((a==b)?0:-1);
167    return a-b;
# Line 138 | Line 172
172  
173   int Gstricmp(const char* a, const char* b);
174  
175 < inline void swap(int &arg1, int &arg2){
176 < arg1 ^= arg2 ^= arg1 ^= arg2;
177 < }
178 <
179 < inline void swap(char* &arg1, char* &arg2){
180 < register char* swp=arg1;
181 < arg1=arg2; arg2=swp;
148 < }
149 <
150 < inline void swap(unsigned int &arg1, unsigned int &arg2)
151 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
152 <
153 < inline void swap(short &arg1, short &arg2)
154 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
155 <
156 < inline void swap(unsigned short &arg1, unsigned short &arg2)
157 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
158 <
159 < inline void swap(long &arg1, long &arg2)
160 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
161 <
162 < inline void swap(unsigned long &arg1, unsigned long &arg2)
163 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
175 > //basic swap template function
176 > template<class T> void Gswap(T& lhs, T& rhs) {
177 > //register T tmp=lhs;
178 > T tmp=lhs; //requires copy operator
179 > lhs=rhs;
180 > rhs=tmp;
181 > }
182  
183 < inline void swap(char &arg1, char &arg2)
184 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
183 > /// bitCount_32 - this function counts the number of set bits in a value.
184 > /// Ex. CountPopulation(0xF000F000) = 8
185 > /// Returns 0 if the word is zero.
186 > inline uint bitCount_32(uint32_t Value) {
187 > #if __GNUC__ >= 4
188 >    return __builtin_popcount(Value);
189 > #else
190 >    uint32_t v = Value - ((Value >> 1) & 0x55555555);
191 >    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
192 >    return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
193 > #endif
194 >  }
195  
196 < inline void swap(unsigned char &arg1, unsigned char &arg2)
197 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
196 > /// bitCount_64 - this function counts the number of set bits in a value,
197 > /// (64 bit edition.)
198 > inline uint bitCount_64(uint64_t Value) {
199 > #if __GNUC__ >= 4
200 >    return __builtin_popcountll(Value);
201 > #else
202 >    uint64_t v = Value - ((Value >> 1) & 0x5555555555555555ULL);
203 >    v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
204 >    v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
205 >    return uint((uint64_t)(v * 0x0101010101010101ULL) >> 56);
206 > #endif
207 >  }
208  
209 < inline void swap(bool &arg1, bool &arg2)
210 < { arg1 ^= arg2 ^= arg1 ^= arg2; }
209 > /// CountTrailingZeros_32 - this function performs the platform optimal form of
210 > /// counting the number of zeros from the least significant bit to the first one
211 > /// bit.  Ex. CountTrailingZeros_32(0xFF00FF00) == 8.
212 > /// Returns 32 if the word is zero.
213 > inline unsigned bitCountTrailingZeros_32(uint32_t Value) {
214 > #if __GNUC__ >= 4
215 >  return Value ? __builtin_ctz(Value) : 32;
216 > #else
217 >  static const unsigned Mod37BitPosition[] = {
218 >    32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13,
219 >    4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
220 >    5, 20, 8, 19, 18
221 >  };
222 >  return Mod37BitPosition[(-Value & Value) % 37];
223 > #endif
224 > }
225  
226 + // CountTrailingZeros_64 - This function performs the platform optimal form
227 + /// of counting the number of zeros from the least significant bit to the first
228 + /// one bit (64 bit edition.)
229 + /// Returns 64 if the word is zero.
230 + inline unsigned bitCountTrailingZeros_64(uint64_t Value) {
231 + #if __GNUC__ >= 4
232 +  return Value ? __builtin_ctzll(Value) : 64;
233 + #else
234 +  static const unsigned Mod67Position[] = {
235 +    64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54,
236 +    4, 64, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55,
237 +    47, 5, 32, 65, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27,
238 +    29, 50, 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56,
239 +    7, 48, 35, 6, 34, 33, 0
240 +  };
241 +  return Mod67Position[(-Value & Value) % 67];
242 + #endif
243 + }
244  
245   /**************** Memory management ***************************/
246  
# Line 179 | Line 249
249   bool GRealloc(pointer* ptr,unsigned long size); // Resize memory
250   void GFree(pointer* ptr); // Free memory, resets ptr to NULL
251  
252 < /********************* debug functions *********************/
252 >
253 > int saprintf(char **retp, const char *fmt, ...);
254  
255   void GError(const char* format,...); // Error routine (aborts program)
256   void GMessage(const char* format,...);// Log message to stderr
257   // Assert failed routine:- usually not called directly but through GASSERT
258   void GAssert(const char* expression, const char* filename, unsigned int lineno);
259  
189
260   // ****************** string manipulation *************************
261   char *Gstrdup(const char* str);
262   //duplicate a string by allocating a copy for it and returning it
# Line 216 | Line 286
286                    void *part, unsigned int partlen);
287  
288   // test if a char is in a string:
289 < bool chrInStr(char c, char* str);
289 > bool chrInStr(char c, const char* str);
290  
291   char* rstrchr(char* str, char ch);
292   /* returns a pointer to the rightmost
293    occurence of ch in str - like rindex for platforms missing it*/
294  
295 < char* strchrs(char* s, const char* chrs);
295 > char* strchrs(const char* s, const char* chrs);
296   //strchr but with a set of chars instead of only one
297  
298 < char* rstrfind(char* str, const char *substr); /* like rindex() but for strings
299 < or like the right side version of strstr()
230 < */
231 < //reverse character string or
232 < char* reverseChars(char* str, int slen=0);
298 > char* rstrfind(const char* str, const char *substr);
299 > // like rindex() but for strings;  right side version of strstr()
300  
301 < char* rstrstr(char* rstart, char *lend, char* substr);
301 > char* reverseChars(char* str, int slen=0); //in place reversal of string
302 >
303 > char* rstrstr(const char* rstart, const char *lend, const char* substr);
304   /*the reversed, rightside equivalent of strstr: starts searching
305   from right end (rstart), going back to left end (lend) and returns
306   a pointer to the last (right) matching character in str */
307  
308 < char* strifind(char* str,  const char* substr);
308 > char* strifind(const char* str,  const char* substr);
309   // the case insensitive version of strstr -- finding a string within a strin
310  
311  
312   //Determines if a string begins with a given prefix
313   //(returns false when any of the params is NULL,
314   // but true when prefix is '' (empty string)!)
315 < bool startsWith(char* s, const char* prefix);
315 > bool startsWith(const char* s, const char* prefix);
316 >
317 > bool endsWith(const char* s, const char* suffix);
318 > //Note: returns true if suffix is empty string, but false if it's NULL
319 >
320  
321   // ELF hash function for strings
322   int strhash(const char* str);
# Line 263 | Line 336
336    //check for overlap with other segment
337    uint len() { return end-start+1; }
338    bool overlap(GSeg* d) {
339 <     return start<d->start ? (d->start<=end) : (start<=d->end);
339 >     //return start<d->start ? (d->start<=end) : (start<=d->end);
340 >     return (start<=d->end && end>=d->start);
341       }
342  
343    bool overlap(GSeg& d) {
344 <     return start<d.start ? (d.start<=end) : (start<=d.end);
344 >     //return start<d.start ? (d.start<=end) : (start<=d.end);
345 >     return (start<=d.end && end>=d.start);
346       }
347  
348    bool overlap(GSeg& d, int fuzz) {
349 <     return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
349 >     //return start<d.start ? (d.start<=end+fuzz) : (start<=d.end+fuzz);
350 >     return (start<=d.end+fuzz && end+fuzz>=d.start);
351       }
352  
353    bool overlap(uint s, uint e) {
354 <    if (s>e) { swap(s,e); }
355 <     return start<s ? (s<=end) : (start<=e);
354 >     if (s>e) { Gswap(s,e); }
355 >     //return start<s ? (s<=end) : (start<=e);
356 >     return (start<=e && end>=s);
357       }
358  
359    //return the length of overlap between two segments
# Line 291 | Line 368
368          }
369       }
370    int overlapLen(uint rstart, uint rend) {
371 <     if (rstart>rend) { swap(rstart,rend); }
371 >     if (rstart>rend) { Gswap(rstart,rend); }
372       if (start<rstart) {
373          if (rstart>end) return 0;
374          return (rend>end) ? end-rstart+1 : rend-rstart+1;
# Line 313 | Line 390
390    bool operator==(GSeg& d){
391        return (start==d.start && end==d.end);
392        }
316  bool operator>(GSeg& d){
317     return (start==d.start)?(end>d.end):(start>d.start);
318     }
393    bool operator<(GSeg& d){
394       return (start==d.start)?(end<d.end):(start<d.start);
395       }
# Line 328 | Line 402
402  
403   //GLineReader -- text line reading/buffering class
404   class GLineReader {
405 +   bool closeFile;
406     int len;
407     int allocated;
408     char* buf;
# Line 340 | Line 415
415     char* chars() { return buf; }
416     char* line() { return buf; }
417     int readcount() { return lcount; } //number of lines read
418 +   void setFile(FILE* stream) { file=stream; }
419     int length() { return len; }
420     int size() { return len; } //same as size();
421     bool isEof() {return isEOF; }
# Line 356 | Line 432
432                             // the given file position
433     void pushBack() { if (lcount>0) pushed=true; } // "undo" the last getLine request
434              // so the next call will in fact return the same line
435 +   GLineReader(const char* fname) {
436 +      FILE* f=fopen(fname, "rb");
437 +      if (f==NULL) GError("Error opening file '%s'!\n",fname);
438 +      closeFile=true;
439 +      init(f);
440 +      }
441     GLineReader(FILE* stream=NULL, off_t fpos=0) {
442 +     closeFile=false;
443 +     init(stream,fpos);
444 +     }
445 +   void init(FILE* stream, off_t fpos=0) {
446       len=0;
447       isEOF=false;
448       allocated=1024;
# Line 369 | Line 455
455       }
456     ~GLineReader() {
457       GFREE(buf);
458 +     if (closeFile) fclose(file);
459       }
460   };
461  
# Line 379 | Line 466
466    */
467   char* fgetline(char* & buf, int& buflen, FILE* stream, off_t* f_pos=NULL, int* linelen=NULL);
468  
469 +
470 + //print int/values nicely formatted in 3-digit groups
471 + char* commaprint(uint64 n);
472 +
473   /*********************** File management functions *********************/
474  
475 < // removes the directory part from a full-path file name
476 < // this is a destructive operation for the given string!
475 > // removes the last part (file or directory name) of a full path
476 > // WARNING: this is a destructive operation for the given string!
477   void delFileName(char* filepath);
478  
479 < // returns a pointer to the file name part in a full-path filename
480 < char* getFileName(char* filepath);
479 > // returns a pointer to the last file or directory name in a full path
480 > const char* getFileName(const char* filepath);
481 > // returns a pointer to the file "extension" part in a filename
482 > const char* getFileExt(const char* filepath);
483 >
484  
485   int fileExists(const char* fname);
486   //returns 0 if file entry doesn't exist
# Line 394 | Line 488
488   //        2 if it's a regular file
489   //        3 otherwise (?)
490  
491 < off_t fileSize(const char* fpath);
491 > int64 fileSize(const char* fpath);
492 >
493 > //write a formatted fasta record, fasta formatted
494 > void writeFasta(FILE *fw, const char* seqid, const char* descr,
495 >        const char* seq, int linelen=60, int seqlen=0);
496  
497   //parses the next number found in a string at the current position
498   //until a non-digit (and not a '.', 'e','E','-','+') is encountered;

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines