1 |
/* |
2 |
This file contains the implementation of the BIOSEG PostgreSQL type. The |
3 |
implementation is a copy of the SEG type from the contrib directory in the |
4 |
PostgreSQL source, which has been changed to represent ranges of integers. |
5 |
The anticipated use of this type is to represent contiguous intervals of a |
6 |
biological sequence. |
7 |
|
8 |
SEG code written Gene Selkov, Jr. |
9 |
|
10 |
Changes for BIOSEG written by Kim Rutherford <kmr@xenu.org.uk>. |
11 |
*/ |
12 |
|
13 |
#include "postgres.h" |
14 |
|
15 |
#include <stdlib.h> |
16 |
#include <errno.h> |
17 |
#include <limits.h> |
18 |
|
19 |
#include "access/gist.h" |
20 |
#include "access/skey.h" |
21 |
#include "utils/builtins.h" |
22 |
|
23 |
#include "biosegdata.h" |
24 |
|
25 |
#ifdef INTERBASE_COORDS |
26 |
#define BIOSEG bioseg0 |
27 |
#define BIOSEG_PREFIX(x) bioseg0 ## x |
28 |
#else |
29 |
#define BIOSEG bioseg |
30 |
#define BIOSEG_PREFIX(x) bioseg ## x |
31 |
#endif |
32 |
|
33 |
PG_MODULE_MAGIC; |
34 |
|
35 |
extern int BIOSEG_PREFIX(_yyparse)(); |
36 |
extern void BIOSEG_PREFIX(_yyerror)(const char *message); |
37 |
extern void BIOSEG_PREFIX(_scanner_init)(const char *str); |
38 |
extern void BIOSEG_PREFIX(_scanner_finish)(void); |
39 |
|
40 |
/* |
41 |
** Input/Output routines |
42 |
*/ |
43 |
SEG *BIOSEG_PREFIX(_in)(char *str); |
44 |
char *BIOSEG_PREFIX(_out)(SEG * seg); |
45 |
SEG *BIOSEG_PREFIX(_create)(int32 lower, int32 upper); |
46 |
int32 BIOSEG_PREFIX(_lower)(SEG * seg); |
47 |
int32 BIOSEG_PREFIX(_upper)(SEG * seg); |
48 |
int32 BIOSEG_PREFIX(_center)(SEG * seg); |
49 |
|
50 |
/* |
51 |
** GiST support methods |
52 |
*/ |
53 |
bool BIOSEG_PREFIX(_gist_consistent)(GISTENTRY *entry, SEG * query, StrategyNumber strategy); |
54 |
GISTENTRY *BIOSEG_PREFIX(_gist_compress)(GISTENTRY *entry); |
55 |
GISTENTRY *BIOSEG_PREFIX(_gist_decompress)(GISTENTRY *entry); |
56 |
float *BIOSEG_PREFIX(_gist_penalty)(GISTENTRY *origentry, GISTENTRY *newentry, float *result); |
57 |
GIST_SPLITVEC *BIOSEG_PREFIX(_gist_picksplit)(GistEntryVector *entryvec, GIST_SPLITVEC *v); |
58 |
bool BIOSEG_PREFIX(_gist_leaf_consistent)(SEG * key, SEG * query, StrategyNumber strategy); |
59 |
bool BIOSEG_PREFIX(_gist_internal_consistent)(SEG * key, SEG * query, StrategyNumber strategy); |
60 |
SEG *BIOSEG_PREFIX(_gist_union)(GistEntryVector *entryvec, int *sizep); |
61 |
SEG *BIOSEG_PREFIX(_gist_binary_union)(SEG * r1, SEG * r2, int *sizep); |
62 |
bool *BIOSEG_PREFIX(_gist_same)(SEG * b1, SEG * b2, bool *result); |
63 |
|
64 |
|
65 |
/* |
66 |
** R-tree support functions |
67 |
*/ |
68 |
bool BIOSEG_PREFIX(_same)(SEG * a, SEG * b); |
69 |
bool BIOSEG_PREFIX(_contains_int)(SEG * a, int *b); |
70 |
bool BIOSEG_PREFIX(_contains)(SEG * a, SEG * b); |
71 |
bool BIOSEG_PREFIX(_contained)(SEG * a, SEG * b); |
72 |
bool BIOSEG_PREFIX(_overlap)(SEG * a, SEG * b); |
73 |
bool BIOSEG_PREFIX(_left)(SEG * a, SEG * b); |
74 |
bool BIOSEG_PREFIX(_over_left)(SEG * a, SEG * b); |
75 |
bool BIOSEG_PREFIX(_right)(SEG * a, SEG * b); |
76 |
bool BIOSEG_PREFIX(_over_right)(SEG * a, SEG * b); |
77 |
void BIOSEG_PREFIX(_rt_size)(SEG * a, int32 *sz); |
78 |
int32 BIOSEG_PREFIX(_size)(SEG * a); |
79 |
|
80 |
static SEG *BIOSEG_PREFIX(_inter)(SEG * a, SEG * b); |
81 |
static SEG *BIOSEG_PREFIX(_union)(SEG * a, SEG * b); |
82 |
|
83 |
/* |
84 |
** Various operators |
85 |
*/ |
86 |
int32 BIOSEG_PREFIX(_cmp)(SEG * a, SEG * b); |
87 |
bool BIOSEG_PREFIX(_lt)(SEG * a, SEG * b); |
88 |
bool BIOSEG_PREFIX(_le)(SEG * a, SEG * b); |
89 |
bool BIOSEG_PREFIX(_gt)(SEG * a, SEG * b); |
90 |
bool BIOSEG_PREFIX(_ge)(SEG * a, SEG * b); |
91 |
bool BIOSEG_PREFIX(_different)(SEG * a, SEG * b); |
92 |
Datum BIOSEG_PREFIX(_joinsel)(PG_FUNCTION_ARGS); |
93 |
Datum BIOSEG_PREFIX(_sel)(PG_FUNCTION_ARGS); |
94 |
Datum BIOSEG_PREFIX(_contsel)(PG_FUNCTION_ARGS); |
95 |
Datum BIOSEG_PREFIX(_contjoinsel)(PG_FUNCTION_ARGS); |
96 |
|
97 |
|
98 |
static int get_dots(char **str); |
99 |
static int get_int(char **str, int32 *result); |
100 |
|
101 |
|
102 |
/***************************************************************************** |
103 |
* Input/Output functions |
104 |
*****************************************************************************/ |
105 |
|
106 |
int MAX_DIGITS = 10; |
107 |
|
108 |
SEG * |
109 |
BIOSEG_PREFIX(_in)(char *str) |
110 |
{ |
111 |
int32 lower; |
112 |
int32 upper; |
113 |
|
114 |
if (!get_int(&str, &lower)) { |
115 |
return NULL; |
116 |
} |
117 |
|
118 |
if (str[0] == 0) { |
119 |
upper = lower; |
120 |
} else { |
121 |
if (!get_dots(&str)) { |
122 |
ereport(ERROR, |
123 |
(errcode(ERRCODE_SYNTAX_ERROR), |
124 |
errmsg("bad bioseg representation"), |
125 |
errdetail("number followed by something other than ..: %s", str))); |
126 |
return NULL; |
127 |
} |
128 |
|
129 |
if (!get_int(&str, &upper)) { |
130 |
ereport(ERROR, |
131 |
(errcode(ERRCODE_SYNTAX_ERROR), |
132 |
errmsg("bad bioseg representation"), |
133 |
errdetail("number\"..\" followed by something other than a number: %s", str))); |
134 |
return NULL; |
135 |
} |
136 |
|
137 |
if (lower > upper) { |
138 |
ereport(ERROR, |
139 |
(errcode(ERRCODE_SYNTAX_ERROR), |
140 |
errmsg("bad bioseg representation"), |
141 |
errdetail("lower limit of range greater than upper"))); |
142 |
return NULL; |
143 |
} |
144 |
|
145 |
if (str[0] != 0) { |
146 |
ereport(ERROR, |
147 |
(errcode(ERRCODE_SYNTAX_ERROR), |
148 |
errmsg("bad bioseg representation"), |
149 |
errdetail("garbage at end of string: %s", str))); |
150 |
return NULL; |
151 |
} |
152 |
} |
153 |
|
154 |
{ |
155 |
SEG *result = palloc(sizeof(SEG)); |
156 |
|
157 |
result->lower = lower; |
158 |
result->upper = upper; |
159 |
return result; |
160 |
} |
161 |
} |
162 |
|
163 |
#ifdef INTERBASE_COORDS |
164 |
#define MIN_LOWER INT_MIN |
165 |
#else |
166 |
#define MIN_LOWER 1l |
167 |
#endif |
168 |
|
169 |
int get_int(char **strp, int32 *result) { |
170 |
char *return_pos; |
171 |
long int long_result = -1; |
172 |
|
173 |
if (!(*strp)[0]) { |
174 |
ereport(ERROR, |
175 |
(errcode(ERRCODE_SYNTAX_ERROR), |
176 |
errmsg("bad bioseg representation"), |
177 |
errdetail("end of string found when expecting an integer"))); |
178 |
return 0; |
179 |
} |
180 |
|
181 |
errno = 0; |
182 |
|
183 |
long_result = strtol(*strp, &return_pos, 0); |
184 |
|
185 |
if (errno == ERANGE && (long_result == LONG_MAX || long_result == LONG_MIN)) { |
186 |
ereport(ERROR, |
187 |
(errcode(ERRCODE_SYNTAX_ERROR), |
188 |
errmsg("bad bioseg representation"), |
189 |
errdetail("integer at: %s is out of range", *strp))); |
190 |
return 0; |
191 |
} |
192 |
|
193 |
if (errno != 0 && long_result == 0) { |
194 |
ereport(ERROR, |
195 |
(errcode(ERRCODE_SYNTAX_ERROR), |
196 |
errmsg("bad bioseg representation"), |
197 |
errdetail("unable to read an integer: %s", strerror(errno)))); |
198 |
return 0; |
199 |
} |
200 |
|
201 |
if (*strp == return_pos) { |
202 |
ereport(ERROR, |
203 |
(errcode(ERRCODE_SYNTAX_ERROR), |
204 |
errmsg("bad bioseg representation"), |
205 |
errdetail("no integer found at: %s", *strp))); |
206 |
return 0; |
207 |
} |
208 |
|
209 |
if (long_result < MIN_LOWER) { |
210 |
// when INTERBASE_COORDS is set, on machines where |
211 |
// sizeof(long int) == sizeof(int32) this block won't be executed because |
212 |
// strtol() will return ERANGE |
213 |
ereport(ERROR, |
214 |
(errcode(ERRCODE_SYNTAX_ERROR), |
215 |
errmsg("bad bioseg representation"), |
216 |
errdetail("integer %ld at: %s is out of range - must be >= %d", |
217 |
long_result, *strp, MIN_LOWER))); |
218 |
return 0; |
219 |
} |
220 |
|
221 |
if (long_result > INT_MAX) { |
222 |
// on machines where sizeof(long int) == sizeof(int32) this won't happen |
223 |
// because strtol() will return ERANGE |
224 |
ereport(ERROR, |
225 |
(errcode(ERRCODE_SYNTAX_ERROR), |
226 |
errmsg("bad bioseg representation"), |
227 |
errdetail("integer %ld at: %s is out of range - must be <= %d", |
228 |
long_result, *strp, INT_MAX))); |
229 |
return 0; |
230 |
} |
231 |
|
232 |
*strp = return_pos; |
233 |
*result = long_result; |
234 |
|
235 |
return 1; |
236 |
} |
237 |
|
238 |
int |
239 |
get_dots(char **strp) |
240 |
{ |
241 |
if ((*strp)[0] == '.') { |
242 |
(*strp)++; |
243 |
if ((*strp)[0] == '.') { |
244 |
(*strp)++; |
245 |
if ((*strp)[0] == '.') { |
246 |
// allow for "10...20" |
247 |
(*strp)++; |
248 |
} |
249 |
return 1; |
250 |
} else { |
251 |
return 0; |
252 |
} |
253 |
} else { |
254 |
return 0; |
255 |
} |
256 |
} |
257 |
|
258 |
char * |
259 |
BIOSEG_PREFIX(_out)(SEG * bioseg) |
260 |
{ |
261 |
char *result; |
262 |
char *p; |
263 |
|
264 |
if (bioseg == NULL) |
265 |
return (NULL); |
266 |
|
267 |
p = result = (char *) palloc(40); |
268 |
|
269 |
if (bioseg->lower == bioseg->upper) |
270 |
{ |
271 |
/* |
272 |
* indicates that this interval was built by bioseg_in off a single point |
273 |
*/ |
274 |
sprintf(p, "%d", bioseg->lower); |
275 |
} |
276 |
else |
277 |
{ |
278 |
sprintf(p, "%d..%d", bioseg->lower, bioseg->upper); |
279 |
} |
280 |
|
281 |
return (result); |
282 |
} |
283 |
|
284 |
SEG * |
285 |
BIOSEG_PREFIX(_create)(int32 lower, int32 upper) |
286 |
{ |
287 |
SEG *result = palloc(sizeof(SEG)); |
288 |
|
289 |
if (lower > upper) { |
290 |
ereport(ERROR, |
291 |
(errcode(ERRCODE_SYNTAX_ERROR), |
292 |
errmsg("bad bioseg representation"), |
293 |
errdetail("lower limit of range greater than upper"))); |
294 |
return NULL; |
295 |
} |
296 |
|
297 |
if (lower < MIN_LOWER) { |
298 |
ereport(ERROR, |
299 |
(errcode(ERRCODE_SYNTAX_ERROR), |
300 |
errmsg("bad bioseg representation"), |
301 |
errdetail("lower bound (%ld) must be >= %ld", |
302 |
long_result, MIN_LOWER))); |
303 |
return 0; |
304 |
} |
305 |
|
306 |
result->lower = lower; |
307 |
result->upper = upper; |
308 |
|
309 |
return result; |
310 |
} |
311 |
|
312 |
|
313 |
|
314 |
int32 |
315 |
BIOSEG_PREFIX(_center)(SEG * bioseg) |
316 |
{ |
317 |
return ((int32) bioseg->lower + (int32) bioseg->upper) / 2; |
318 |
} |
319 |
|
320 |
int32 |
321 |
BIOSEG_PREFIX(_lower)(SEG * bioseg) |
322 |
{ |
323 |
return bioseg->lower; |
324 |
} |
325 |
|
326 |
int32 |
327 |
BIOSEG_PREFIX(_upper)(SEG * bioseg) |
328 |
{ |
329 |
return bioseg->upper; |
330 |
} |
331 |
|
332 |
|
333 |
/***************************************************************************** |
334 |
* GiST functions |
335 |
*****************************************************************************/ |
336 |
|
337 |
/* |
338 |
** The GiST Consistent method for biosegments |
339 |
** Should return false if for all data items x below entry, |
340 |
** the predicate x op query == FALSE, where op is the oper |
341 |
** corresponding to strategy in the pg_amop table. |
342 |
*/ |
343 |
bool |
344 |
BIOSEG_PREFIX(_gist_consistent)(GISTENTRY *entry, |
345 |
SEG * query, |
346 |
StrategyNumber strategy) |
347 |
{ |
348 |
/* |
349 |
* if entry is not leaf, use bioseg_gist_internal_consistent, else use |
350 |
* bioseg_gist_leaf_consistent |
351 |
*/ |
352 |
if (GIST_LEAF(entry)) |
353 |
return (BIOSEG_PREFIX(_gist_leaf_consistent)((SEG *) DatumGetPointer(entry->key), query, strategy)); |
354 |
else |
355 |
return (BIOSEG_PREFIX(_gist_internal_consistent)((SEG *) DatumGetPointer(entry->key), query, strategy)); |
356 |
} |
357 |
|
358 |
/* |
359 |
** The GiST Union method for biosegments |
360 |
** returns the minimal bounding bioseg that encloses all the entries in entryvec |
361 |
*/ |
362 |
SEG * |
363 |
BIOSEG_PREFIX(_gist_union)(GistEntryVector *entryvec, int *sizep) |
364 |
{ |
365 |
int numranges; |
366 |
int i; |
367 |
SEG *out = (SEG *) NULL; |
368 |
SEG *tmp; |
369 |
|
370 |
#ifdef GIST_DEBUG |
371 |
fprintf(stderr, "union\n"); |
372 |
#endif |
373 |
|
374 |
numranges = entryvec->n; |
375 |
tmp = (SEG *) DatumGetPointer(entryvec->vector[0].key); |
376 |
*sizep = sizeof(SEG); |
377 |
|
378 |
for (i = 1; i < numranges; i++) |
379 |
{ |
380 |
out = BIOSEG_PREFIX(_gist_binary_union)(tmp, (SEG *) |
381 |
DatumGetPointer(entryvec->vector[i].key), |
382 |
sizep); |
383 |
tmp = out; |
384 |
} |
385 |
|
386 |
return (out); |
387 |
} |
388 |
|
389 |
/* |
390 |
** GiST Compress and Decompress methods for biosegments |
391 |
** do not do anything. |
392 |
*/ |
393 |
GISTENTRY * |
394 |
BIOSEG_PREFIX(_gist_compress)(GISTENTRY *entry) |
395 |
{ |
396 |
return (entry); |
397 |
} |
398 |
|
399 |
GISTENTRY * |
400 |
BIOSEG_PREFIX(_gist_decompress)(GISTENTRY *entry) |
401 |
{ |
402 |
return (entry); |
403 |
} |
404 |
|
405 |
/* |
406 |
** The GiST Penalty method for biosegments |
407 |
** As in the R-tree paper, we use change in area as our penalty metric |
408 |
*/ |
409 |
float * |
410 |
BIOSEG_PREFIX(_gist_penalty)(GISTENTRY *origentry, GISTENTRY *newentry, float *result) |
411 |
{ |
412 |
SEG *ud; |
413 |
int32 tmp1; |
414 |
int32 tmp2; |
415 |
|
416 |
ud = BIOSEG_PREFIX(_union)((SEG *) DatumGetPointer(origentry->key), |
417 |
(SEG *) DatumGetPointer(newentry->key)); |
418 |
BIOSEG_PREFIX(_rt_size)(ud, &tmp1); |
419 |
BIOSEG_PREFIX(_rt_size)((SEG *) DatumGetPointer(origentry->key), &tmp2); |
420 |
*result = tmp1 - tmp2; |
421 |
|
422 |
#ifdef GIST_DEBUG |
423 |
fprintf(stderr, "penalty\n"); |
424 |
fprintf(stderr, "\t%g\n", *result); |
425 |
#endif |
426 |
|
427 |
return (result); |
428 |
} |
429 |
|
430 |
|
431 |
/* |
432 |
** The GiST PickSplit method for segments |
433 |
** We use Guttman's poly time split algorithm |
434 |
*/ |
435 |
GIST_SPLITVEC * |
436 |
BIOSEG_PREFIX(_gist_picksplit)(GistEntryVector *entryvec, |
437 |
GIST_SPLITVEC *v) |
438 |
{ |
439 |
OffsetNumber i; |
440 |
OffsetNumber j; |
441 |
SEG *datum_alpha; |
442 |
SEG *datum_beta; |
443 |
SEG *datum_l; |
444 |
SEG *datum_r; |
445 |
SEG *union_d; |
446 |
SEG *union_dl; |
447 |
SEG *union_dr; |
448 |
SEG *inter_d; |
449 |
bool firsttime; |
450 |
int32 size_alpha; |
451 |
int32 size_beta; |
452 |
int32 size_union; |
453 |
int32 size_inter; |
454 |
int32 size_waste; |
455 |
int32 waste; |
456 |
int32 size_l; |
457 |
int32 size_r; |
458 |
int nbytes; |
459 |
OffsetNumber seed_1 = 1; |
460 |
OffsetNumber seed_2 = 2; |
461 |
OffsetNumber *left; |
462 |
OffsetNumber *right; |
463 |
OffsetNumber maxoff; |
464 |
|
465 |
#ifdef GIST_DEBUG |
466 |
fprintf(stderr, "picksplit\n"); |
467 |
#endif |
468 |
|
469 |
maxoff = entryvec->n - 2; |
470 |
nbytes = (maxoff + 2) * sizeof(OffsetNumber); |
471 |
v->spl_left = (OffsetNumber *) palloc(nbytes); |
472 |
v->spl_right = (OffsetNumber *) palloc(nbytes); |
473 |
|
474 |
firsttime = true; |
475 |
waste = 0.0; |
476 |
|
477 |
for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) |
478 |
{ |
479 |
datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key); |
480 |
for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) |
481 |
{ |
482 |
datum_beta = (SEG *) DatumGetPointer(entryvec->vector[j].key); |
483 |
|
484 |
/* compute the wasted space by unioning these guys */ |
485 |
/* size_waste = size_union - size_inter; */ |
486 |
union_d = BIOSEG_PREFIX(_union)(datum_alpha, datum_beta); |
487 |
BIOSEG_PREFIX(_rt_size)(union_d, &size_union); |
488 |
inter_d = BIOSEG_PREFIX(_inter)(datum_alpha, datum_beta); |
489 |
BIOSEG_PREFIX(_rt_size)(inter_d, &size_inter); |
490 |
size_waste = size_union - size_inter; |
491 |
|
492 |
/* |
493 |
* are these a more promising split that what we've already seen? |
494 |
*/ |
495 |
if (size_waste > waste || firsttime) |
496 |
{ |
497 |
waste = size_waste; |
498 |
seed_1 = i; |
499 |
seed_2 = j; |
500 |
firsttime = false; |
501 |
} |
502 |
} |
503 |
} |
504 |
|
505 |
left = v->spl_left; |
506 |
v->spl_nleft = 0; |
507 |
right = v->spl_right; |
508 |
v->spl_nright = 0; |
509 |
|
510 |
datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[seed_1].key); |
511 |
datum_l = BIOSEG_PREFIX(_union)(datum_alpha, datum_alpha); |
512 |
BIOSEG_PREFIX(_rt_size)(datum_l, &size_l); |
513 |
datum_beta = (SEG *) DatumGetPointer(entryvec->vector[seed_2].key); |
514 |
datum_r = BIOSEG_PREFIX(_union)(datum_beta, datum_beta); |
515 |
BIOSEG_PREFIX(_rt_size)(datum_r, &size_r); |
516 |
|
517 |
/* |
518 |
* Now split up the regions between the two seeds. An important property |
519 |
* of this split algorithm is that the split vector v has the indices of |
520 |
* items to be split in order in its left and right vectors. We exploit |
521 |
* this property by doing a merge in the code that actually splits the |
522 |
* page. |
523 |
* |
524 |
* For efficiency, we also place the new index tuple in this loop. This is |
525 |
* handled at the very end, when we have placed all the existing tuples |
526 |
* and i == maxoff + 1. |
527 |
*/ |
528 |
|
529 |
maxoff = OffsetNumberNext(maxoff); |
530 |
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) |
531 |
{ |
532 |
/* |
533 |
* If we've already decided where to place this item, just put it on |
534 |
* the right list. Otherwise, we need to figure out which page needs |
535 |
* the least enlargement in order to store the item. |
536 |
*/ |
537 |
|
538 |
if (i == seed_1) |
539 |
{ |
540 |
*left++ = i; |
541 |
v->spl_nleft++; |
542 |
continue; |
543 |
} |
544 |
else if (i == seed_2) |
545 |
{ |
546 |
*right++ = i; |
547 |
v->spl_nright++; |
548 |
continue; |
549 |
} |
550 |
|
551 |
/* okay, which page needs least enlargement? */ |
552 |
datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key); |
553 |
union_dl = BIOSEG_PREFIX(_union)(datum_l, datum_alpha); |
554 |
union_dr = BIOSEG_PREFIX(_union)(datum_r, datum_alpha); |
555 |
BIOSEG_PREFIX(_rt_size)(union_dl, &size_alpha); |
556 |
BIOSEG_PREFIX(_rt_size)(union_dr, &size_beta); |
557 |
|
558 |
/* pick which page to add it to */ |
559 |
if (size_alpha - size_l < size_beta - size_r) |
560 |
{ |
561 |
datum_l = union_dl; |
562 |
size_l = size_alpha; |
563 |
*left++ = i; |
564 |
v->spl_nleft++; |
565 |
} |
566 |
else |
567 |
{ |
568 |
datum_r = union_dr; |
569 |
size_r = size_alpha; |
570 |
*right++ = i; |
571 |
v->spl_nright++; |
572 |
} |
573 |
} |
574 |
*left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */ |
575 |
|
576 |
v->spl_ldatum = PointerGetDatum(datum_l); |
577 |
v->spl_rdatum = PointerGetDatum(datum_r); |
578 |
|
579 |
return v; |
580 |
} |
581 |
|
582 |
/* |
583 |
** Equality methods |
584 |
*/ |
585 |
bool * |
586 |
BIOSEG_PREFIX(_gist_same)(SEG * b1, SEG * b2, bool *result) |
587 |
{ |
588 |
if (BIOSEG_PREFIX(_same)(b1, b2)) |
589 |
*result = TRUE; |
590 |
else |
591 |
*result = FALSE; |
592 |
|
593 |
#ifdef GIST_DEBUG |
594 |
fprintf(stderr, "same: %s\n", (*result ? "TRUE" : "FALSE")); |
595 |
#endif |
596 |
|
597 |
return (result); |
598 |
} |
599 |
|
600 |
/* |
601 |
** SUPPORT ROUTINES |
602 |
*/ |
603 |
bool |
604 |
BIOSEG_PREFIX(_gist_leaf_consistent)(SEG * key, |
605 |
SEG * query, |
606 |
StrategyNumber strategy) |
607 |
{ |
608 |
bool retval; |
609 |
|
610 |
#ifdef GIST_QUERY_DEBUG |
611 |
fprintf(stderr, "leaf_consistent, %d\n", strategy); |
612 |
#endif |
613 |
|
614 |
switch (strategy) |
615 |
{ |
616 |
case RTLeftStrategyNumber: |
617 |
retval = (bool) BIOSEG_PREFIX(_left)(key, query); |
618 |
break; |
619 |
case RTOverLeftStrategyNumber: |
620 |
retval = (bool) BIOSEG_PREFIX(_over_left)(key, query); |
621 |
break; |
622 |
case RTOverlapStrategyNumber: |
623 |
retval = (bool) BIOSEG_PREFIX(_overlap)(key, query); |
624 |
break; |
625 |
case RTOverRightStrategyNumber: |
626 |
retval = (bool) BIOSEG_PREFIX(_over_right)(key, query); |
627 |
break; |
628 |
case RTRightStrategyNumber: |
629 |
retval = (bool) BIOSEG_PREFIX(_right)(key, query); |
630 |
break; |
631 |
case RTSameStrategyNumber: |
632 |
retval = (bool) BIOSEG_PREFIX(_same)(key, query); |
633 |
break; |
634 |
case RTContainsStrategyNumber: |
635 |
case RTOldContainsStrategyNumber: |
636 |
retval = (bool) BIOSEG_PREFIX(_contains)(key, query); |
637 |
break; |
638 |
case RTContainedByStrategyNumber: |
639 |
case RTOldContainedByStrategyNumber: |
640 |
retval = (bool) BIOSEG_PREFIX(_contained)(key, query); |
641 |
break; |
642 |
default: |
643 |
retval = FALSE; |
644 |
} |
645 |
return (retval); |
646 |
} |
647 |
|
648 |
bool |
649 |
BIOSEG_PREFIX(_gist_internal_consistent)(SEG * key, |
650 |
SEG * query, |
651 |
StrategyNumber strategy) |
652 |
{ |
653 |
bool retval; |
654 |
|
655 |
#ifdef GIST_QUERY_DEBUG |
656 |
fprintf(stderr, "internal_consistent, %d\n", strategy); |
657 |
#endif |
658 |
|
659 |
switch (strategy) |
660 |
{ |
661 |
case RTLeftStrategyNumber: |
662 |
retval = (bool) !BIOSEG_PREFIX(_over_right)(key, query); |
663 |
break; |
664 |
case RTOverLeftStrategyNumber: |
665 |
retval = (bool) !BIOSEG_PREFIX(_right)(key, query); |
666 |
break; |
667 |
case RTOverlapStrategyNumber: |
668 |
retval = (bool) BIOSEG_PREFIX(_overlap)(key, query); |
669 |
break; |
670 |
case RTOverRightStrategyNumber: |
671 |
retval = (bool) !BIOSEG_PREFIX(_left)(key, query); |
672 |
break; |
673 |
case RTRightStrategyNumber: |
674 |
retval = (bool) !BIOSEG_PREFIX(_over_left)(key, query); |
675 |
break; |
676 |
case RTSameStrategyNumber: |
677 |
case RTContainsStrategyNumber: |
678 |
case RTOldContainsStrategyNumber: |
679 |
retval = (bool) BIOSEG_PREFIX(_contains)(key, query); |
680 |
break; |
681 |
case RTContainedByStrategyNumber: |
682 |
case RTOldContainedByStrategyNumber: |
683 |
retval = (bool) BIOSEG_PREFIX(_overlap)(key, query); |
684 |
break; |
685 |
default: |
686 |
retval = FALSE; |
687 |
} |
688 |
return (retval); |
689 |
} |
690 |
|
691 |
SEG * |
692 |
BIOSEG_PREFIX(_gist_binary_union)(SEG * r1, SEG * r2, int *sizep) |
693 |
{ |
694 |
SEG *retval; |
695 |
|
696 |
retval = BIOSEG_PREFIX(_union)(r1, r2); |
697 |
*sizep = sizeof(SEG); |
698 |
|
699 |
return (retval); |
700 |
} |
701 |
|
702 |
|
703 |
bool |
704 |
BIOSEG_PREFIX(_contains)(SEG * a, SEG * b) |
705 |
{ |
706 |
#ifdef INTERBASE_COORDS |
707 |
return ((a->lower <= b->lower) && (a->upper >= b->upper) && |
708 |
a->lower != b->upper && a->upper != b->lower); |
709 |
#else |
710 |
return ((a->lower <= b->lower) && (a->upper >= b->upper)); |
711 |
#endif |
712 |
} |
713 |
|
714 |
bool |
715 |
BIOSEG_PREFIX(_contained)(SEG * a, SEG * b) |
716 |
{ |
717 |
return (BIOSEG_PREFIX(_contains)(b, a)); |
718 |
} |
719 |
|
720 |
/***************************************************************************** |
721 |
* Operator class for R-tree indexing |
722 |
*****************************************************************************/ |
723 |
|
724 |
bool |
725 |
BIOSEG_PREFIX(_same)(SEG * a, SEG * b) |
726 |
{ |
727 |
return BIOSEG_PREFIX(_cmp)(a, b) == 0; |
728 |
} |
729 |
|
730 |
/* bioseg_overlap -- does a overlap b? |
731 |
*/ |
732 |
bool |
733 |
BIOSEG_PREFIX(_overlap)(SEG * a, SEG * b) |
734 |
{ |
735 |
#ifdef INTERBASE_COORDS |
736 |
return ( |
737 |
((a->upper >= b->upper) && (a->lower < b->upper) && |
738 |
b->lower != a->upper) |
739 |
|| |
740 |
((b->upper >= a->upper) && (b->lower < a->upper) && |
741 |
a->lower != b->upper) |
742 |
); |
743 |
#else |
744 |
return |
745 |
((a->upper >= b->upper) && (a->lower <= b->upper)) |
746 |
|| |
747 |
((b->upper >= a->upper) && (b->lower <= a->upper)); |
748 |
#endif |
749 |
} |
750 |
|
751 |
/* bioseg_overleft -- is the right edge of (a) located at or left of the right edge of (b)? |
752 |
*/ |
753 |
bool |
754 |
BIOSEG_PREFIX(_over_left)(SEG * a, SEG * b) |
755 |
{ |
756 |
return (a->upper <= b->upper); |
757 |
} |
758 |
|
759 |
/* bioseg_left -- is (a) entirely on the left of (b)? |
760 |
*/ |
761 |
bool |
762 |
BIOSEG_PREFIX(_left)(SEG * a, SEG * b) |
763 |
{ |
764 |
return (a->upper < b->lower); |
765 |
} |
766 |
|
767 |
/* bioseg_right -- is (a) entirely on the right of (b)? |
768 |
*/ |
769 |
bool |
770 |
BIOSEG_PREFIX(_right)(SEG * a, SEG * b) |
771 |
{ |
772 |
return (a->lower > b->upper); |
773 |
} |
774 |
|
775 |
/* bioseg_overright -- is the left edge of (a) located at or right of the left edge of (b)? |
776 |
*/ |
777 |
bool |
778 |
BIOSEG_PREFIX(_over_right)(SEG * a, SEG * b) |
779 |
{ |
780 |
return (a->lower >= b->lower); |
781 |
} |
782 |
|
783 |
|
784 |
static SEG * |
785 |
BIOSEG_PREFIX(_union)(SEG * a, SEG * b) |
786 |
{ |
787 |
SEG *n; |
788 |
|
789 |
n = (SEG *) palloc(sizeof(*n)); |
790 |
|
791 |
/* take max of upper endpoints */ |
792 |
if (a->upper > b->upper) |
793 |
{ |
794 |
n->upper = a->upper; |
795 |
} |
796 |
else |
797 |
{ |
798 |
n->upper = b->upper; |
799 |
} |
800 |
|
801 |
/* take min of lower endpoints */ |
802 |
if (a->lower < b->lower) |
803 |
{ |
804 |
n->lower = a->lower; |
805 |
} |
806 |
else |
807 |
{ |
808 |
n->lower = b->lower; |
809 |
} |
810 |
|
811 |
return (n); |
812 |
} |
813 |
|
814 |
|
815 |
static SEG * |
816 |
BIOSEG_PREFIX(_inter)(SEG * a, SEG * b) |
817 |
{ |
818 |
SEG *n; |
819 |
|
820 |
n = (SEG *) palloc(sizeof(*n)); |
821 |
|
822 |
/* take min of upper endpoints */ |
823 |
if (a->upper < b->upper) |
824 |
{ |
825 |
n->upper = a->upper; |
826 |
} |
827 |
else |
828 |
{ |
829 |
n->upper = b->upper; |
830 |
} |
831 |
|
832 |
/* take max of lower endpoints */ |
833 |
if (a->lower > b->lower) |
834 |
{ |
835 |
n->lower = a->lower; |
836 |
} |
837 |
else |
838 |
{ |
839 |
n->lower = b->lower; |
840 |
} |
841 |
|
842 |
return (n); |
843 |
} |
844 |
|
845 |
void |
846 |
BIOSEG_PREFIX(_rt_size)(SEG * a, int32 *size) |
847 |
{ |
848 |
if (a == (SEG *) NULL) |
849 |
*size = 0; |
850 |
else |
851 |
#ifdef INTERBASE_COORDS |
852 |
*size = (int32) (a->upper - a->lower); |
853 |
#else |
854 |
*size = (int32) (a->upper - a->lower + 1); |
855 |
#endif |
856 |
} |
857 |
|
858 |
int32 |
859 |
BIOSEG_PREFIX(_size)(SEG * a) |
860 |
{ |
861 |
#ifdef INTERBASE_COORDS |
862 |
return a->upper - a->lower; |
863 |
#else |
864 |
return a->upper - a->lower + 1; |
865 |
#endif |
866 |
} |
867 |
|
868 |
|
869 |
/***************************************************************************** |
870 |
* Miscellaneous operators |
871 |
*****************************************************************************/ |
872 |
int32 |
873 |
BIOSEG_PREFIX(_cmp)(SEG * a, SEG * b) |
874 |
{ |
875 |
if (a->lower < b->lower) |
876 |
return -1; |
877 |
if (a->lower > b->lower) |
878 |
return 1; |
879 |
|
880 |
if (a->upper < b->upper) |
881 |
return -1; |
882 |
if (a->upper > b->upper) |
883 |
return 1; |
884 |
|
885 |
return 0; |
886 |
} |
887 |
|
888 |
bool |
889 |
BIOSEG_PREFIX(_lt)(SEG * a, SEG * b) |
890 |
{ |
891 |
return BIOSEG_PREFIX(_cmp)(a, b) < 0; |
892 |
} |
893 |
|
894 |
bool |
895 |
BIOSEG_PREFIX(_le)(SEG * a, SEG * b) |
896 |
{ |
897 |
return BIOSEG_PREFIX(_cmp)(a, b) <= 0; |
898 |
} |
899 |
|
900 |
bool |
901 |
BIOSEG_PREFIX(_gt)(SEG * a, SEG * b) |
902 |
{ |
903 |
return BIOSEG_PREFIX(_cmp)(a, b) > 0; |
904 |
} |
905 |
|
906 |
bool |
907 |
BIOSEG_PREFIX(_ge)(SEG * a, SEG * b) |
908 |
{ |
909 |
return BIOSEG_PREFIX(_cmp)(a, b) >= 0; |
910 |
} |
911 |
|
912 |
bool |
913 |
BIOSEG_PREFIX(_different)(SEG * a, SEG * b) |
914 |
{ |
915 |
return BIOSEG_PREFIX(_cmp)(a, b) != 0; |
916 |
} |
917 |
|
918 |
bool |
919 |
BIOSEG_PREFIX(_contains_int)(SEG * a, int *b) |
920 |
{ |
921 |
#ifdef INTERBASE_COORDS |
922 |
return ((a->lower < *b) && (a->upper > *b)); |
923 |
#else |
924 |
return ((a->lower <= *b) && (a->upper >= *b)); |
925 |
#endif |
926 |
} |
927 |
|
928 |
/* |
929 |
** These values lower than those in geo_selfuncs.c to encourage the planner to |
930 |
** choose to use the GIST index - found by trail and error. |
931 |
*/ |
932 |
Datum |
933 |
BIOSEG_PREFIX(_sel)(PG_FUNCTION_ARGS) |
934 |
{ |
935 |
PG_RETURN_FLOAT8(2.0e-4); |
936 |
} |
937 |
|
938 |
Datum |
939 |
BIOSEG_PREFIX(_joinsel)(PG_FUNCTION_ARGS) |
940 |
{ |
941 |
PG_RETURN_FLOAT8(1.0e-5); |
942 |
} |
943 |
|
944 |
Datum |
945 |
BIOSEG_PREFIX(_contsel)(PG_FUNCTION_ARGS) |
946 |
{ |
947 |
PG_RETURN_FLOAT8(1.0e-4); |
948 |
} |
949 |
|
950 |
Datum |
951 |
BIOSEG_PREFIX(_contjoinsel)(PG_FUNCTION_ARGS) |
952 |
{ |
953 |
PG_RETURN_FLOAT8(5e-6); |
954 |
} |