Mailing List Archive

r3673 - in trunk: c_src/KinoSearch/Highlight c_src/KinoSearch/Search perl/lib/KinoSearch/Highlight perl/t
Author: creamyg
Date: 2008-07-29 20:20:39 -0700 (Tue, 29 Jul 2008)
New Revision: 3673

Modified:
trunk/c_src/KinoSearch/Highlight/HeatMap.bp
trunk/c_src/KinoSearch/Highlight/HeatMap.c
trunk/c_src/KinoSearch/Highlight/HighlightSpan.bp
trunk/c_src/KinoSearch/Highlight/HighlightSpan.c
trunk/c_src/KinoSearch/Highlight/Highlighter.c
trunk/c_src/KinoSearch/Search/PhraseQuery.c
trunk/c_src/KinoSearch/Search/TermQuery.c
trunk/perl/lib/KinoSearch/Highlight/HighlightSpan.pm
trunk/perl/t/303-highlighter.t
trunk/perl/t/309-highlight_span.t
trunk/perl/t/310-heat_map.t
Log:
Refactor HighlightSpan so that it uses a length rather than an end offset.


Modified: trunk/c_src/KinoSearch/Highlight/HeatMap.bp
===================================================================
--- trunk/c_src/KinoSearch/Highlight/HeatMap.bp 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/c_src/KinoSearch/Highlight/HeatMap.bp 2008-07-30 03:20:39 UTC (rev 3673)
@@ -17,7 +17,7 @@
init(HeatMap *self, VArray *spans, u32_t window = 133);

/** Compare two (pointers-to) HighlightSpans, first by start_offset, then
- * by end_offset.
+ * by length.
*/
static int
compare_spans(const void *va, const void *vb);
@@ -34,7 +34,7 @@
* Span 3: positions 20-30, score .5
*
* @param spans An array of HighlightSpans. The spans must be sorted by
- * start_offset then end_offset.
+ * start_offset then length.
*/
incremented VArray*
Flatten_Spans(HeatMap *self, VArray *spans);
@@ -51,7 +51,7 @@
* each pair that yields a non-zero proximity boost.
*
* @param spans An array of HighlightSpans. The spans must be sorted by
- * start_offset then end_offset.
+ * start_offset then length.
*/
incremented VArray*
Generate_Proximity_Boosts(HeatMap *self, VArray *spans);

Modified: trunk/c_src/KinoSearch/Highlight/HeatMap.c
===================================================================
--- trunk/c_src/KinoSearch/Highlight/HeatMap.c 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/c_src/KinoSearch/Highlight/HeatMap.c 2008-07-30 03:20:39 UTC (rev 3673)
@@ -43,7 +43,7 @@
HighlightSpan *a = *(HighlightSpan**)va;
HighlightSpan *b = *(HighlightSpan**)vb;
int comparison = a->start_offset - b->start_offset;
- if (comparison == 0) comparison = a->end_offset - b->end_offset;
+ if (comparison == 0) comparison = a->length - b->length;
return comparison;
}

@@ -65,11 +65,11 @@
u32_t i;
u32_t last;

- /* Assemble a list of all unique start_offset/end_offset boundaries. */
+ /* Assemble a list of all unique start/end boundaries. */
for (i = 0; i < num_spans; i++) {
HighlightSpan *span = (HighlightSpan*)VA_Fetch(spans, i);
bounds[i] = span->start_offset;
- bounds[i + num_spans] = span->end_offset;
+ bounds[i + num_spans] = span->start_offset + span->length;
}
qsort(bounds, num_spans * 2, sizeof(u32_t), compare_u32);
for (i = 0, num_bounds = 0, last = U32_MAX; i < num_spans * 2; i++) {
@@ -82,7 +82,9 @@
/* Create one HighlightSpan for each zone between two bounds. */
flattened = VA_new(num_bounds - 1);
for (i = 0; i < num_bounds - 1; i++) {
- HighlightSpan *span = HLSpan_new(bounds[i], bounds[i + 1], 0.0f);
+ u32_t start = bounds[i];
+ u32_t length = bounds[i + 1] - start;
+ HighlightSpan *span = HLSpan_new(start, length, 0.0f);
VA_Push(flattened, (Obj*)span);
REFCOUNT_DEC(span);
}
@@ -109,6 +111,7 @@
for (i = 0; i < spans->size; i++) {
HighlightSpan *source_span = (HighlightSpan*)VA_Fetch(spans, i);
u32_t j;
+ u32_t source_span_end = source_span->start_offset + source_span->length;

/* Get the location of the flattened span that shares the source
* span's start_offset. */
@@ -123,7 +126,7 @@
for (j = dest_tick; j < flattened->size; j++) {
HighlightSpan *dest_span
= (HighlightSpan*)VA_Fetch(flattened, j);
- if (dest_span->start_offset == source_span->end_offset)
+ if (dest_span->start_offset == source_span_end)
break;
else {
dest_span->weight += source_span->weight;
@@ -154,7 +157,8 @@
int comparison = HeatMap_compare_spans(&span1, &span2);
HighlightSpan *lower = comparison <= 0 ? span1 : span2;
HighlightSpan *upper = comparison >= 0 ? span1 : span2;
- i32_t distance = (i32_t)upper->start_offset - lower->end_offset;
+ i32_t lower_end_offset = lower->start_offset + lower->length;
+ i32_t distance = (i32_t)upper->start_offset - lower_end_offset;

/* If spans overlap, set distance to 0. */
if (distance < 0) distance = 0;
@@ -189,8 +193,10 @@
break;
}
else {
+ u32_t length = (span2->start_offset - span1->start_offset)
+ + span2->length;
HighlightSpan *boost_span = HLSpan_new(span1->start_offset,
- span2->end_offset, prox_score);
+ length, prox_score);
VA_Push(boosts, (Obj*)boost_span);
REFCOUNT_DEC(boost_span);
}

Modified: trunk/c_src/KinoSearch/Highlight/HighlightSpan.bp
===================================================================
--- trunk/c_src/KinoSearch/Highlight/HighlightSpan.bp 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/c_src/KinoSearch/Highlight/HighlightSpan.bp 2008-07-30 03:20:39 UTC (rev 3673)
@@ -4,14 +4,14 @@
extends KinoSearch::Obj {

u32_t start_offset;
- u32_t end_offset;
+ u32_t length;
float weight;

static incremented HighlightSpan*
- new(u32_t start_offset, u32_t end_offset, float weight);
+ new(u32_t start_offset, u32_t length, float weight);

static HighlightSpan*
- init(HighlightSpan *self, u32_t start_offset, u32_t end_offset,
+ init(HighlightSpan *self, u32_t start_offset, u32_t length,
float weight);
}


Modified: trunk/c_src/KinoSearch/Highlight/HighlightSpan.c
===================================================================
--- trunk/c_src/KinoSearch/Highlight/HighlightSpan.c 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/c_src/KinoSearch/Highlight/HighlightSpan.c 2008-07-30 03:20:39 UTC (rev 3673)
@@ -4,18 +4,18 @@


HighlightSpan*
-HLSpan_new(u32_t start_offset, u32_t end_offset, float weight)
+HLSpan_new(u32_t start_offset, u32_t length, float weight)
{
HighlightSpan *self = (HighlightSpan*)CREATE(NULL, HIGHLIGHTSPAN);
- return HLSpan_init(self, start_offset, end_offset, weight);
+ return HLSpan_init(self, start_offset, length, weight);
}

HighlightSpan*
-HLSpan_init(HighlightSpan *self, u32_t start_offset, u32_t end_offset,
+HLSpan_init(HighlightSpan *self, u32_t start_offset, u32_t length,
float weight)
{
self->start_offset = start_offset;
- self->end_offset = end_offset;
+ self->length = length;
self->weight = weight;
return self;
}

Modified: trunk/c_src/KinoSearch/Highlight/Highlighter.c
===================================================================
--- trunk/c_src/KinoSearch/Highlight/Highlighter.c 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/c_src/KinoSearch/Highlight/Highlighter.c 2008-07-30 03:20:39 UTC (rev 3673)
@@ -336,7 +336,7 @@
}
else {
i32_t relative_start = span->start_offset - top;
- i32_t relative_end = span->end_offset - top;
+ i32_t relative_end = relative_start + span->length;

if (relative_start > last_end) {
CharBuf *encoded;

Modified: trunk/c_src/KinoSearch/Search/PhraseQuery.c
===================================================================
--- trunk/c_src/KinoSearch/Search/PhraseQuery.c 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/c_src/KinoSearch/Search/PhraseQuery.c 2008-07-30 03:20:39 UTC (rev 3673)
@@ -382,7 +382,7 @@
}
}

- span = HLSpan_new(start_offset, end_offset, weight);
+ span = HLSpan_new(start_offset, end_offset - start_offset, weight);
VA_Push(spans, (Obj*)span);
REFCOUNT_DEC(span);


Modified: trunk/c_src/KinoSearch/Search/TermQuery.c
===================================================================
--- trunk/c_src/KinoSearch/Search/TermQuery.c 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/c_src/KinoSearch/Search/TermQuery.c 2008-07-30 03:20:39 UTC (rev 3673)
@@ -242,8 +242,10 @@
starts = term_vector->start_offsets;
ends = term_vector->end_offsets;
for (i = 0; i < starts->size; i++) {
- HighlightSpan *span = HLSpan_new(IntMap_Get(starts, i),
- IntMap_Get(ends, i), TermCompiler_Get_Weight(self));
+ i32_t start = IntMap_Get(starts, i);
+ i32_t length = IntMap_Get(ends, i) - start;
+ HighlightSpan *span = HLSpan_new(start, length,
+ TermCompiler_Get_Weight(self));
VA_Push(spans, (Obj*)span);
REFCOUNT_DEC(span);
}

Modified: trunk/perl/lib/KinoSearch/Highlight/HighlightSpan.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Highlight/HighlightSpan.pm 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/perl/lib/KinoSearch/Highlight/HighlightSpan.pm 2008-07-30 03:20:39 UTC (rev 3673)
@@ -7,8 +7,8 @@
__AUTO_XS__

{ "KinoSearch::Highlight::HighlightSpan" => {
- make_getters => [qw( start_offset end_offset weight )],
- make_setters => [qw( start_offset end_offset weight )],
+ make_getters => [qw( start_offset length weight )],
+ make_setters => [qw( start_offset length weight )],
make_constructors => ["new"],
}
}
@@ -30,7 +30,7 @@

my $highlighter = KinoSearch::Highlight::Highlighter->new(
start_offset => 75,
- end_offset => 82,
+ length => 7,
weight => 1,
);

@@ -40,7 +40,7 @@

Accessor method.

-=head2 get_end_offset
+=head2 get_length

Likewise.


Modified: trunk/perl/t/303-highlighter.t
===================================================================
--- trunk/perl/t/303-highlighter.t 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/perl/t/303-highlighter.t 2008-07-30 03:20:39 UTC (rev 3673)
@@ -67,7 +67,7 @@
my $top = $hl->_find_best_fragment(
fragment => $target,
field_val => $field_val,
- heat_map => make_heat_map( [ 2, 3 ] ),
+ heat_map => make_heat_map( [ 2, 1 ] ),
);
is( $target->to_perl, "$phi $phi b", "Find_Best_Fragment" );
is( $top, 2, "correct offset returned by Find_Best_Fragment" );
@@ -76,7 +76,7 @@
$top = $hl->_find_best_fragment(
fragment => $target,
field_val => $field_val,
- heat_map => make_heat_map( [ 2, 3 ] ),
+ heat_map => make_heat_map( [ 2, 1 ] ),
);
is( $target->to_perl, $field_val->to_perl,
"Find_Best_Fragment returns whole field when field is short" );
@@ -86,7 +86,7 @@
$top = $hl->_find_best_fragment(
fragment => $target,
field_val => $field_val,
- heat_map => make_heat_map( [ 6, 8 ] ),
+ heat_map => make_heat_map( [ 6, 2 ] ),
);
is( $target->to_perl, "b$phi$phi",
"Find_Best_Fragment shifts left to deal with overrun" );
@@ -96,7 +96,7 @@
$top = $hl->_find_best_fragment(
fragment => $target,
field_val => $field_val,
- heat_map => make_heat_map( [ 0, 2 ] ),
+ heat_map => make_heat_map( [ 0, 1 ] ),
);
is( $target->to_perl,
"a$phi" . "bcd",
@@ -166,7 +166,7 @@
$target = make_cb("");
$hl->_highlight_excerpt(
raw_excerpt => 'a b c',
- spans => make_spans( [ 2, 3 ] ),
+ spans => make_spans( [ 2, 1 ] ),
top => 0,
highlighted => $target,
);
@@ -175,7 +175,7 @@
$target = make_cb("");
$hl->_highlight_excerpt(
raw_excerpt => "$phi $phi $phi",
- spans => make_spans( [ 2, 3 ] ),
+ spans => make_spans( [ 2, 1 ] ),
top => 0,
highlighted => $target,
);
@@ -188,7 +188,7 @@
$target = make_cb("");
$hl->_highlight_excerpt(
raw_excerpt => "$phi $phi $phi",
- spans => make_spans( [ 3, 4 ] ),
+ spans => make_spans( [ 3, 1 ] ),
top => 1,
highlighted => $target,
);
@@ -330,7 +330,7 @@
for my $span_spec (@_) {
my $hl_span = KinoSearch::Highlight::HighlightSpan->new(
start_offset => $span_spec->[0],
- end_offset => $span_spec->[1],
+ length => $span_spec->[1],
weight => 1,
);
$spans->push($hl_span);

Modified: trunk/perl/t/309-highlight_span.t
===================================================================
--- trunk/perl/t/309-highlight_span.t 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/perl/t/309-highlight_span.t 2008-07-30 03:20:39 UTC (rev 3673)
@@ -8,19 +8,19 @@

my $span = KinoSearch::Highlight::HighlightSpan->new(
start_offset => 2,
- end_offset => 5,
- weight => 3,
+ length => 3,
+ weight => 7,
);

is( $span->get_start_offset, 2, "get_start_offset" );
-is( $span->get_end_offset, 5, "get_end_offset" );
-is( $span->get_weight, 3, "get_weight" );
+is( $span->get_length, 3, "get_length" );
+is( $span->get_weight, 7, "get_weight" );

$span->set_start_offset(10);
-$span->set_end_offset(11);
+$span->set_length(1);
$span->set_weight(4);

is( $span->get_start_offset, 10, "set_start_offset" );
-is( $span->get_end_offset, 11, "set_end_offset" );
+is( $span->get_length, 1, "set_length" );
is( $span->get_weight, 4, "set_weight" );


Modified: trunk/perl/t/310-heat_map.t
===================================================================
--- trunk/perl/t/310-heat_map.t 2008-07-30 02:03:22 UTC (rev 3672)
+++ trunk/perl/t/310-heat_map.t 2008-07-30 03:20:39 UTC (rev 3673)
@@ -9,59 +9,59 @@

my $big_boost = $heat_map->calc_proximity_boost(
span1 => make_span( 0, 10, 1.0 ),
- span2 => make_span( 10, 20, 1.0 )
+ span2 => make_span( 10, 10, 1.0 )
);
my $equally_big_boost = $heat_map->calc_proximity_boost(
span1 => make_span( 0, 10, 1.0 ),
- span2 => make_span( 5, 9, 1.0 )
+ span2 => make_span( 5, 4, 1.0 )
);
my $smaller_boost = $heat_map->calc_proximity_boost(
span1 => make_span( 0, 10, 1.0 ),
- span2 => make_span( 100, 110, 1.0 )
+ span2 => make_span( 100, 10, 1.0 )
);
my $zero_boost = $heat_map->calc_proximity_boost(
span1 => make_span( 0, 10, 1.0 ),
- span2 => make_span( 150, 160, 1.0 )
+ span2 => make_span( 150, 10, 1.0 )
);
is( $big_boost, $equally_big_boost,
"overlapping and abutting produce the same proximity boost" );
cmp_ok( $big_boost, '>', $smaller_boost, "closer is better" );
is( $zero_boost, 0, "distance outside of window yields no prox boost" );

-my $spans = make_spans( [ 10, 20, 1.0 ], [ 16, 30, 2.0 ] );
+my $spans = make_spans( [ 10, 10, 1.0 ], [ 16, 14, 2.0 ] );
my $flattened = $heat_map->flatten_spans($spans);
is_deeply(
spans_to_arg_array($flattened),
- [ [ 10, 16, 1.0 ], [ 16, 20, 3.0 ], [ 20, 30, 2.0 ] ],
+ [ [ 10, 6, 1.0 ], [ 16, 4, 3.0 ], [ 20, 10, 2.0 ] ],
"flatten two overlapping spans"
);
my $boosts = $heat_map->generate_proximity_boosts($spans);
is_deeply(
spans_to_arg_array($boosts),
- [ [ 10, 30, 3.0 ] ],
+ [ [ 10, 20, 3.0 ] ],
"prox boosts for overlap"
);

-$spans = make_spans( [ 10, 20, 1.0 ], [ 16, 30, 2.0 ], [ 50, 51, 1.0 ] );
+$spans = make_spans( [ 10, 10, 1.0 ], [ 16, 14, 2.0 ], [ 50, 1, 1.0 ] );
$flattened = $heat_map->flatten_spans($spans);
is_deeply(
spans_to_arg_array($flattened),
- [ [ 10, 16, 1.0 ], [ 16, 20, 3.0 ], [ 20, 30, 2.0 ], [ 50, 51, 1.0 ] ],
+ [ [ 10, 6, 1.0 ], [ 16, 4, 3.0 ], [ 20, 10, 2.0 ], [ 50, 1, 1.0 ] ],
"flatten two overlapping spans, leave hole, then third span"
);
$boosts = $heat_map->generate_proximity_boosts($spans);
is( scalar @$boosts,
2 + 1, "boosts generated for each unique pair, since all were in range" );

-$spans = make_spans( [ 10, 20, 1.0 ], [ 14, 18, 4.0 ], [ 16, 30, 2.0 ] );
+$spans = make_spans( [ 10, 10, 1.0 ], [ 14, 4, 4.0 ], [ 16, 14, 2.0 ] );
$flattened = $heat_map->flatten_spans($spans);
is_deeply(
spans_to_arg_array($flattened),
- [ [ 10, 14, 1.0 ],
- [ 14, 16, 5.0 ],
- [ 16, 18, 7.0 ],
- [ 18, 20, 3.0 ],
- [ 20, 30, 2.0 ]
+ [ [ 10, 4, 1.0 ],
+ [ 14, 2, 5.0 ],
+ [ 16, 2, 7.0 ],
+ [ 18, 2, 3.0 ],
+ [ 20, 10, 2.0 ]
],
"flatten three overlapping spans"
);
@@ -70,15 +70,15 @@
2 + 1, "boosts generated for each unique pair, since all were in range" );

$spans = make_spans(
- [ 10, 20, 1.0 ],
- [ 16, 30, 4.0 ],
- [ 16, 30, 2.0 ],
- [ 30, 40, 10.0 ]
+ [ 10, 10, 1.0 ],
+ [ 16, 14, 4.0 ],
+ [ 16, 14, 2.0 ],
+ [ 30, 10, 10.0 ]
);
$flattened = $heat_map->flatten_spans($spans);
is_deeply(
spans_to_arg_array($flattened),
- [ [ 10, 16, 1.0 ], [ 16, 20, 7.0 ], [ 20, 30, 6.0 ], [ 30, 40, 10.0 ] ],
+ [ [ 10, 6, 1.0 ], [ 16, 4, 7.0 ], [ 20, 10, 6.0 ], [ 30, 10, 10.0 ] ],
"flatten 4 spans, middle two have identical range"
);
$boosts = $heat_map->generate_proximity_boosts($spans);
@@ -88,15 +88,15 @@
);

$spans = make_spans(
- [ 10, 20, 1.0 ],
- [ 16, 20, 4.0 ],
- [ 16, 30, 2.0 ],
- [ 230, 240, 10.0 ]
+ [ 10, 10, 1.0 ],
+ [ 16, 4, 4.0 ],
+ [ 16, 14, 2.0 ],
+ [ 230, 10, 10.0 ]
);
$flattened = $heat_map->flatten_spans($spans);
is_deeply(
spans_to_arg_array($flattened),
- [ [ 10, 16, 1.0 ], [ 16, 20, 7.0 ], [ 20, 30, 2.0 ], [ 230, 240, 10.0 ] ],
+ [ [ 10, 6, 1.0 ], [ 16, 4, 7.0 ], [ 20, 10, 2.0 ], [ 230, 10, 10.0 ] ],
"flatten 4 spans, middle two have identical starts but different ends"
);
$boosts = $heat_map->generate_proximity_boosts($spans);
@@ -105,7 +105,7 @@
sub make_span {
return KinoSearch::Highlight::HighlightSpan->new(
start_offset => $_[0],
- end_offset => $_[1],
+ length => $_[1],
weight => $_[2],
);
}
@@ -123,7 +123,7 @@
my @out;
for (@$spans) {
push @out,
- [ $_->get_start_offset, $_->get_end_offset, $_->get_weight ];
+ [ $_->get_start_offset, $_->get_length, $_->get_weight ];
}
return \@out;
}


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits