ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/bioseg/trunk/bioseg.c
(Generate patch)
# Line 77 | Line 77
77   void   BIOSEG_PREFIX(_rt_size)(SEG * a, int32 *sz);
78   int32  BIOSEG_PREFIX(_size)(SEG * a);
79  
80 static SEG *BIOSEG_PREFIX(_inter)(SEG * a, SEG * b);
80   static SEG *BIOSEG_PREFIX(_union)(SEG * a, SEG * b);
81  
82   /*
# Line 302 | Line 301
301                         (long int) lower, MIN_LOWER)));
302      return 0;
303    }
304 <  
304 >
305    result->lower = lower;
306    result->upper = upper;
307  
# Line 410 | Line 409
409    BIOSEG_PREFIX(_gist_penalty)(GISTENTRY *origentry, GISTENTRY *newentry, float *result)
410   {
411    SEG   *ud;
412 +  SEG   *origrange;
413 +  SEG   *newrange;
414    int32 tmp1;
415    int32 tmp2;
416  
417 <  ud = BIOSEG_PREFIX(_union)((SEG *) DatumGetPointer(origentry->key),
418 <                    (SEG *) DatumGetPointer(newentry->key));
417 >  origrange = (SEG *) DatumGetPointer(origentry->key);
418 >  newrange = (SEG *) DatumGetPointer(newentry->key);
419 >  ud = BIOSEG_PREFIX(_union)(origrange, newrange);
420    BIOSEG_PREFIX(_rt_size)(ud, &tmp1);
421 <  BIOSEG_PREFIX(_rt_size)((SEG *) DatumGetPointer(origentry->key), &tmp2);
422 <  *result = tmp1 - tmp2;
421 >  BIOSEG_PREFIX(_rt_size)(origrange, &tmp2);
422 >  if (tmp1 == tmp2)
423 >    {
424 >      BIOSEG_PREFIX(_rt_size)(newrange, &tmp1);
425 >      *result = ((tmp2 - tmp1) * 100000.0) / tmp2;
426 >    }
427 >  else
428 >    {
429 >      *result = tmp1 - tmp2 + 100000.0;
430 >    }
431  
432   #ifdef GIST_DEBUG
433 <  fprintf(stderr, "penalty\n");
424 <  fprintf(stderr, "\t%g\n", *result);
433 >  fprintf(stderr, "Penalty of putting %d..%d into %d..%d is %g\n", newrange->lower, newrange->upper, origrange->lower, origrange->upper, *result);
434   #endif
435  
436    return (result);
# Line 430 | Line 439
439  
440   /*
441   ** The GiST PickSplit method for segments
433 ** We use Guttman's poly time split algorithm
442   */
443   GIST_SPLITVEC *
444    BIOSEG_PREFIX(_gist_picksplit)(GistEntryVector *entryvec,
445                      GIST_SPLITVEC *v)
446   {
447    OffsetNumber  i;
440  OffsetNumber  j;
448    SEG          *datum_alpha;
442  SEG          *datum_beta;
449    SEG          *datum_l;
450    SEG          *datum_r;
451 <  SEG          *union_d;
452 <  SEG          *union_dl;
453 <  SEG          *union_dr;
454 <  SEG          *inter_d;
449 <  bool          firsttime;
450 <  int32         size_alpha;
451 <  int32         size_beta;
452 <  int32         size_union;
453 <  int32         size_inter;
454 <  int32         size_waste;
455 <  int32         waste;
456 <  int32         size_l;
457 <  int32         size_r;
451 >  int32         lowest;
452 >  int32         highest;
453 >  int32         midpoint;
454 >  int32         split;
455    int           nbytes;
456 <  OffsetNumber  seed_1 = 1;
457 <  OffsetNumber  seed_2 = 2;
456 >  int           firsttime;
457 >  long long int midpointsum;
458 >  int           sumcount;
459    OffsetNumber *left;
460    OffsetNumber *right;
461    OffsetNumber  maxoff;
462  
463 < #ifdef GIST_DEBUG
466 <  fprintf(stderr, "picksplit\n");
467 < #endif
468 <
469 <  maxoff = entryvec->n - 2;
463 >  maxoff = entryvec->n - 1;
464    nbytes = (maxoff + 2) * sizeof(OffsetNumber);
465    v->spl_left = (OffsetNumber *) palloc(nbytes);
466    v->spl_right = (OffsetNumber *) palloc(nbytes);
467  
468 +  midpointsum = 0;
469 +  sumcount = 0;
470    firsttime = true;
471 <  waste = 0.0;
471 >  lowest = 0;
472 >  highest = 0;
473  
474 <  for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
474 >  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
475      {
476        datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
477 <      for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
477 >      midpoint = datum_alpha->lower + ((datum_alpha->upper - datum_alpha->lower) / 2);
478 >      midpointsum += midpoint;
479 >      sumcount++;
480 >      if (firsttime || (midpoint < lowest))
481          {
482 <          datum_beta = (SEG *) DatumGetPointer(entryvec->vector[j].key);
483 <
484 <          /* compute the wasted space by unioning these guys */
485 <          /* size_waste = size_union - size_inter; */
486 <          union_d = BIOSEG_PREFIX(_union)(datum_alpha, datum_beta);
487 <          BIOSEG_PREFIX(_rt_size)(union_d, &size_union);
488 <          inter_d = BIOSEG_PREFIX(_inter)(datum_alpha, datum_beta);
489 <          BIOSEG_PREFIX(_rt_size)(inter_d, &size_inter);
490 <          size_waste = size_union - size_inter;
491 <
492 <          /*
493 <           * are these a more promising split that what we've already seen?
494 <           */
495 <          if (size_waste > waste || firsttime)
496 <            {
497 <              waste = size_waste;
498 <              seed_1 = i;
499 <              seed_2 = j;
500 <              firsttime = false;
501 <            }
482 >          lowest = midpoint;
483          }
484 +      if (firsttime || (midpoint > highest))
485 +        {
486 +          highest = midpoint;
487 +        }
488 +      firsttime = false;
489      }
490  
491 +  split = midpointsum / sumcount;
492    left = v->spl_left;
493    v->spl_nleft = 0;
494    right = v->spl_right;
495    v->spl_nright = 0;
496  
497 <  datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[seed_1].key);
498 <  datum_l = BIOSEG_PREFIX(_union)(datum_alpha, datum_alpha);
499 <  BIOSEG_PREFIX(_rt_size)(datum_l, &size_l);
500 <  datum_beta = (SEG *) DatumGetPointer(entryvec->vector[seed_2].key);
501 <  datum_r = BIOSEG_PREFIX(_union)(datum_beta, datum_beta);
502 <  BIOSEG_PREFIX(_rt_size)(datum_r, &size_r);
497 >  datum_l = (SEG *) palloc(sizeof(*datum_l));
498 >  datum_l->lower = lowest;
499 >  datum_l->upper = lowest;
500 >  datum_r = (SEG *) palloc(sizeof(*datum_r));
501 >  datum_r->lower = highest;
502 >  datum_r->upper = highest;
503  
504    /*
505 <   * Now split up the regions between the two seeds.      An important property
519 <   * of this split algorithm is that the split vector v has the indices of
520 <   * items to be split in order in its left and right vectors.  We exploit
521 <   * this property by doing a merge in the code that actually splits the
522 <   * page.
523 <   *
524 <   * For efficiency, we also place the new index tuple in this loop. This is
525 <   * handled at the very end, when we have placed all the existing tuples
526 <   * and i == maxoff + 1.
505 >   * Now split up the regions between the two seeds.
506     */
507  
529  maxoff = OffsetNumberNext(maxoff);
508    for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
509      {
532      /*
533       * If we've already decided where to place this item, just put it on
534       * the right list.      Otherwise, we need to figure out which page needs
535       * the least enlargement in order to store the item.
536       */
537
538      if (i == seed_1)
539        {
540          *left++ = i;
541          v->spl_nleft++;
542          continue;
543        }
544      else if (i == seed_2)
545        {
546          *right++ = i;
547          v->spl_nright++;
548          continue;
549        }
550
551      /* okay, which page needs least enlargement? */
510        datum_alpha = (SEG *) DatumGetPointer(entryvec->vector[i].key);
511 <      union_dl = BIOSEG_PREFIX(_union)(datum_l, datum_alpha);
554 <      union_dr = BIOSEG_PREFIX(_union)(datum_r, datum_alpha);
555 <      BIOSEG_PREFIX(_rt_size)(union_dl, &size_alpha);
556 <      BIOSEG_PREFIX(_rt_size)(union_dr, &size_beta);
511 >      midpoint = datum_alpha->lower + ((datum_alpha->upper - datum_alpha->lower) / 2);
512  
513        /* pick which page to add it to */
514 <      if (size_alpha - size_l < size_beta - size_r)
514 >      if (midpoint <= split)
515          {
516 <          datum_l = union_dl;
562 <          size_l = size_alpha;
516 >          datum_l = BIOSEG_PREFIX(_union)(datum_l, datum_alpha);
517            *left++ = i;
518            v->spl_nleft++;
519          }
520        else
521          {
522 <          datum_r = union_dr;
569 <          size_r = size_alpha;
522 >          datum_r = BIOSEG_PREFIX(_union)(datum_r, datum_alpha);
523            *right++ = i;
524            v->spl_nright++;
525          }
# Line 576 | Line 529
529    v->spl_ldatum = PointerGetDatum(datum_l);
530    v->spl_rdatum = PointerGetDatum(datum_r);
531  
532 + #ifdef GIST_DEBUG
533 +  fprintf(stderr, "Picksplit summary to %d..%d (%d locations) and %d..%d (%d locations), with pivot %d\n", datum_l->lower, datum_l->upper, v->spl_nleft,
534 + datum_r->lower, datum_r->upper, v->spl_nright, split);
535 + #endif
536 +
537    return v;
538   }
539  
# Line 607 | Line 565
565   {
566    bool retval;
567  
610 #ifdef GIST_QUERY_DEBUG
611  fprintf(stderr, "leaf_consistent, %d\n", strategy);
612 #endif
613
568    switch (strategy)
569      {
570      case RTLeftStrategyNumber:
# Line 642 | Line 596
596      default:
597        retval = FALSE;
598      }
599 +
600 + #ifdef GIST_QUERY_DEBUG
601 +  if (retval)
602 +    {
603 +      fprintf(stderr, "leaf_consistent, %d..%d %d %d..%d matches\n", query->lower, query->upper, strategy, key->lower, key->upper);
604 +    }
605 +  else
606 +    {
607 +      fprintf(stderr, "leaf_consistent, %d..%d %d %d..%d\n", query->lower, query->upper, strategy, key->lower, key->upper);
608 +    }
609 + #endif
610 +
611    return (retval);
612   }
613  
# Line 652 | Line 618
618   {
619    bool retval;
620  
655 #ifdef GIST_QUERY_DEBUG
656  fprintf(stderr, "internal_consistent, %d\n", strategy);
657 #endif
658
621    switch (strategy)
622      {
623      case RTLeftStrategyNumber:
# Line 685 | Line 647
647      default:
648        retval = FALSE;
649      }
650 +
651 + #ifdef GIST_QUERY_DEBUG
652 +  if (retval)
653 +    {
654 +      fprintf(stderr, "internal_consistent, %d..%d %d %d..%d matches\n", query->lower, query->upper, strategy, key->lower, key->upper);
655 +    }
656 +  else
657 +    {
658 +      fprintf(stderr, "internal_consistent, %d..%d %d %d..%d\n", query->lower, query->upper, strategy, key->lower, key->upper);
659 +    }
660 + #endif
661 +
662    return (retval);
663   }
664  
# Line 811 | Line 785
785    return (n);
786   }
787  
814
815 static SEG *
816  BIOSEG_PREFIX(_inter)(SEG * a, SEG * b)
817 {
818  SEG *n;
819
820  n = (SEG *) palloc(sizeof(*n));
821
822  /* take min of upper endpoints */
823  if (a->upper < b->upper)
824    {
825      n->upper = a->upper;
826    }
827  else
828    {
829      n->upper = b->upper;
830    }
831
832  /* take max of lower endpoints */
833  if (a->lower > b->lower)
834    {
835      n->lower = a->lower;
836    }
837  else
838    {
839      n->lower = b->lower;
840    }
841
842  return (n);
843 }
844
788   void
789    BIOSEG_PREFIX(_rt_size)(SEG * a, int32 *size)
790   {
# Line 926 | Line 869
869   }
870  
871   /*
872 < ** These values lower than those in geo_selfuncs.c to encourage the planner to
930 < ** choose to use the GIST index - found by trail and error.
872 > ** These are lower than those in geo_selfuncs.c - found by trail and error.
873   */
874   Datum
875    BIOSEG_PREFIX(_sel)(PG_FUNCTION_ARGS)
876   {
877 <  PG_RETURN_FLOAT8(2.0e-5);
877 >  PG_RETURN_FLOAT8(2.0e-4);
878   }
879  
880   Datum
881    BIOSEG_PREFIX(_joinsel)(PG_FUNCTION_ARGS)
882   {
883 <  PG_RETURN_FLOAT8(1.0e-6);
883 >  PG_RETURN_FLOAT8(1.0e-5);
884   }
885  
886   Datum
887    BIOSEG_PREFIX(_contsel)(PG_FUNCTION_ARGS)
888   {
889 <  PG_RETURN_FLOAT8(1.0e-5);
889 >  PG_RETURN_FLOAT8(1.0e-4);
890   }
891  
892   Datum
893    BIOSEG_PREFIX(_contjoinsel)(PG_FUNCTION_ARGS)
894   {
895 <  PG_RETURN_FLOAT8(5e-7);
895 >  PG_RETURN_FLOAT8(5e-6);
896   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines