Mailing List Archive

r3796 - in trunk: c_src/KinoSearch/Search perl/lib/KinoSearch/Search perl/t
Author: creamyg
Date: 2008-08-29 18:20:24 -0700 (Fri, 29 Aug 2008)
New Revision: 3796

Modified:
trunk/c_src/KinoSearch/Search/ANDQuery.c
trunk/c_src/KinoSearch/Search/ANDScorer.bp
trunk/c_src/KinoSearch/Search/ANDScorer.c
trunk/perl/lib/KinoSearch/Search/ANDScorer.pm
trunk/perl/t/514-and_scorer.t
Log:
Refactor ANDScorer, requiring that all child scorers be provided as an array
at compile time and eliminating Add_Subscorer.


Modified: trunk/c_src/KinoSearch/Search/ANDQuery.c
===================================================================
--- trunk/c_src/KinoSearch/Search/ANDQuery.c 2008-08-29 21:31:27 UTC (rev 3795)
+++ trunk/c_src/KinoSearch/Search/ANDQuery.c 2008-08-30 01:20:24 UTC (rev 3796)
@@ -81,8 +81,8 @@
return Compiler_Make_Scorer(only_child, reader);
}
else {
- ANDScorer *and_scorer = ANDScorer_new(Compiler_Get_Similarity(self));
u32_t i;
+ VArray *child_scorers = VA_new(self->children->size);

/* Add subscorers one by one. */
for (i = 0; i < self->children->size; i++) {
@@ -91,16 +91,21 @@

/* If any required clause fails, the whole thing fails. */
if (subscorer == NULL) {
- REFCOUNT_DEC(and_scorer);
+ REFCOUNT_DEC(child_scorers);
return NULL;
}
else {
- ANDScorer_Add_Subscorer(and_scorer, subscorer);
+ VA_Push(child_scorers, (Obj*)subscorer);
REFCOUNT_DEC(subscorer);
}
}

- return (Scorer*)and_scorer;
+ {
+ Scorer *retval = (Scorer*)ANDScorer_new(child_scorers,
+ Compiler_Get_Similarity(self));
+ REFCOUNT_DEC(child_scorers);
+ return retval;
+ }
}
}


Modified: trunk/c_src/KinoSearch/Search/ANDScorer.bp
===================================================================
--- trunk/c_src/KinoSearch/Search/ANDScorer.bp 2008-08-29 21:31:27 UTC (rev 3795)
+++ trunk/c_src/KinoSearch/Search/ANDScorer.bp 2008-08-30 01:20:24 UTC (rev 3796)
@@ -9,25 +9,21 @@
Tally *tally;
Scorer **subscorers;
size_t cap;
- u32_t num_subs;
+ VArray *children;
+ u32_t num_kids;
bool_t more;
bool_t first_time;
ByteBuf *raw_prox_bb;

static incremented ANDScorer*
- new(Similarity *sim);
+ new(VArray *children, Similarity *sim);

static ANDScorer*
- init(ANDScorer *self, Similarity *similarity);
+ init(ANDScorer *self, VArray *children, Similarity *similarity);

void
Destroy(ANDScorer *self);

- /** Add a scorer for a sub-query.
- */
- void
- Add_Subscorer(ANDScorer* self, Scorer* subscorer);
-
public i32_t
Next(ANDScorer *self);


Modified: trunk/c_src/KinoSearch/Search/ANDScorer.c
===================================================================
--- trunk/c_src/KinoSearch/Search/ANDScorer.c 2008-08-29 21:31:27 UTC (rev 3795)
+++ trunk/c_src/KinoSearch/Search/ANDScorer.c 2008-08-30 01:20:24 UTC (rev 3796)
@@ -5,110 +5,60 @@
#include "KinoSearch/Search/Tally.h"
#include "KinoSearch/Util/ByteBuf.h"

-/* Perform some initialization stuff we can't know until all subscorers have
- * been added. Return false if we'll never match anything with this scorer.
- */
-static i32_t
-delayed_init(ANDScorer *self, i32_t target);
-
-/* Mark this scorer as invalid/finished.
- */
-static i32_t
-invalidate(ANDScorer *self);
-
ANDScorer*
-ANDScorer_new(Similarity *sim)
+ANDScorer_new(VArray *children, Similarity *sim)
{
ANDScorer *self = (ANDScorer*)CREATE(NULL, ANDSCORER);
- return ANDScorer_init(self, sim);
+ return ANDScorer_init(self, children, sim);
}

ANDScorer*
-ANDScorer_init(ANDScorer *self, Similarity *sim)
+ANDScorer_init(ANDScorer *self, VArray *children, Similarity *sim)
{
- Scorer_init((Scorer*)self, sim);
+ u32_t i;

/* Init. */
+ Scorer_init((Scorer*)self, sim);
self->tally = Tally_new();
self->cap = 10;
- self->subscorers = MALLOCATE(10, Scorer*);
- self->num_subs = 0;
self->raw_prox_bb = BB_new(0);
self->first_time = true;
- self->more = true;

+ /* Assign. */
+ self->children = REFCOUNT_INC(children);
+ self->num_kids = children->size;
+ self->more = self->num_kids ? true : false;
+ self->subscorers = MALLOCATE(self->num_kids, Scorer*);
+ for (i = 0; i < children->size; i++) {
+ Scorer *subscorer = (Scorer*)VA_Fetch(children, i);
+ self->subscorers[i] = subscorer;
+ if (!Scorer_Next(subscorer)) self->more = false;
+ }
+
+ /* Derive. */
+ self->tally->num_matchers = self->num_kids;
+ self->coord = Sim_Coord(self->sim, self->num_kids, self->num_kids);
+
return self;
}

void
ANDScorer_destroy(ANDScorer *self)
{
- if (self->subscorers) {
- u32_t i;
- for (i = 0; i < self->num_subs; i++) {
- REFCOUNT_DEC(self->subscorers[i]);
- }
- free(self->subscorers);
- }
+ REFCOUNT_DEC(self->children);
+ free(self->subscorers);
REFCOUNT_DEC(self->raw_prox_bb);
REFCOUNT_DEC(self->tally);
Scorer_destroy((Scorer*)self);
}

-void
-ANDScorer_add_subscorer(ANDScorer *self, Scorer *subscorer)
-{
- if (!self->first_time)
- CONFESS("Can't add scorers after starting iteration");
-
- if (self->num_subs == self->cap) {
- self->cap += 100;
- self->subscorers = REALLOCATE(self->subscorers, self->cap, Scorer*);
- }
- self->subscorers[ self->num_subs++ ] = REFCOUNT_INC(subscorer);
-
- /* Add to the matcher count; don't bother with subclauses. */
- self->tally->num_matchers += 1;
-}
-
-static i32_t
-delayed_init(ANDScorer *self, i32_t target)
-{
- i32_t highest = 0;
- u32_t i;
-
- /* Once is enough. */
- self->first_time = false;
-
- /* No scorers, no matches! */
- if (!self->num_subs)
- return invalidate(self);
-
- /* Set fixed value for num_matchers. */
- self->tally->num_matchers = self->num_subs;
-
- /* Calculate coord multiplier. */
- self->coord = Sim_Coord(self->sim, self->num_subs, self->num_subs);
-
- /* Advance all scorers. */
- for (i = 0; i < self->num_subs; i++) {
- i32_t candidate = Scorer_Skip_To(self->subscorers[i], target);
- if (!candidate)
- return invalidate(self);
- else if (candidate > highest)
- highest = candidate;
- }
-
- return highest;
-}
-
i32_t
ANDScorer_next(ANDScorer *self)
{
if (self->first_time) {
- return Scorer_Skip_To(self, 0);
+ return Scorer_Skip_To(self, 1);
}
- else if (self->more) {
+ if (self->more) {
const i32_t target = Scorer_Get_Doc_Num(self->subscorers[0]) + 1;
return Scorer_Skip_To(self, target);
}
@@ -117,32 +67,26 @@
}
}

-static i32_t
-invalidate(ANDScorer *self)
-{
- self->more = false;
- return 0;
-}
-
i32_t
ANDScorer_skip_to(ANDScorer *self, i32_t target)
{
Scorer **const subscorers = self->subscorers;
- const u32_t num_subs = self->num_subs;
+ const u32_t num_kids = self->num_kids;
i32_t highest = 0;

- /* First step: advance. */
+ if (!self->more) return 0;
+
+ /* First step: Advance first subscorer and use it's doc as a starting
+ * point. */
if (self->first_time) {
- /* Scoot ALL subscorers and find the least doc they might agree on. */
- highest = delayed_init(self, target);
- if (!highest)
- return invalidate(self);
+ self->first_time = false;
}
else {
- /* Advance first subscorer and use it's doc as a starting point. */
highest = Scorer_Skip_To(subscorers[0], target);
- if (!highest)
- return invalidate(self);
+ if (!highest) {
+ self->more = false;
+ return 0;
+ }
}

/* Second step: reconcile. */
@@ -151,7 +95,7 @@
bool_t agreement = true;

/* Scoot all scorers up. */
- for (i = 0; i < num_subs; i++) {
+ for (i = 0; i < num_kids; i++) {
Scorer *const subscorer = subscorers[i];
i32_t candidate = Scorer_Get_Doc_Num(subscorer);

@@ -167,13 +111,15 @@
if (candidate < target) {
/* This scorer is definitely the highest right now. */
highest = Scorer_Skip_To(subscorer, target);
- if (!highest)
- return invalidate(self);
+ if (!highest) {
+ self->more = false;
+ return 0;
+ }
}
}

/* If scorers don't agree, send back through the loop. */
- for (i = 0; i < num_subs; i++) {
+ for (i = 0; i < num_kids; i++) {
Scorer *const subscorer = subscorers[i];
const i32_t candidate = Scorer_Get_Doc_Num(subscorer);
if (candidate != highest) {
@@ -205,7 +151,7 @@
Tally *const tally = self->tally;

tally->score = 0.0f;
- for (i = 0; i < self->num_subs; i++) {
+ for (i = 0; i < self->num_kids; i++) {
Tally *subtally = Scorer_Tally(subscorers[i]);
tally->score += subtally->score;
}
@@ -218,7 +164,7 @@
u32_t
ANDScorer_max_matchers(ANDScorer *self)
{
- return self->num_subs;
+ return self->num_kids;
}

/* Copyright 2006-2008 Marvin Humphrey

Modified: trunk/perl/lib/KinoSearch/Search/ANDScorer.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Search/ANDScorer.pm 2008-08-29 21:31:27 UTC (rev 3795)
+++ trunk/perl/lib/KinoSearch/Search/ANDScorer.pm 2008-08-30 01:20:24 UTC (rev 3796)
@@ -7,7 +7,6 @@
__AUTO_XS__

{ "KinoSearch::Search::ANDScorer" => {
- bind_methods => [qw( Add_Subscorer )],
make_constructors => ["new"],
}
}

Modified: trunk/perl/t/514-and_scorer.t
===================================================================
--- trunk/perl/t/514-and_scorer.t 2008-08-29 21:31:27 UTC (rev 3795)
+++ trunk/perl/t/514-and_scorer.t 2008-08-30 01:20:24 UTC (rev 3796)
@@ -26,15 +26,16 @@
sub check_scorer {
my @intervals = @_;
my @doc_num_arrays = map { modulo_set( $_, 100 ) } @intervals;
- my $and_scorer
- = KinoSearch::Search::ANDScorer->new( similarity => $sim, );
- for my $doc_num_array (@doc_num_arrays) {
- my $mock = KSx::Search::MockScorer->new(
- doc_nums => $doc_num_array,
- scores => [ (0) x scalar @$doc_num_array ],
- );
- $and_scorer->add_subscorer($mock);
- }
+ my @children = map {
+ KSx::Search::MockScorer->new(
+ doc_nums => $_,
+ scores => [ (0) x scalar @$_ ],
+ )
+ } @doc_num_arrays;
+ my $and_scorer = KinoSearch::Search::ANDScorer->new(
+ children => \@children,
+ similarity => $sim,
+ );
my @expected = intersect(@doc_num_arrays);
my $collector = KinoSearch::Search::HitCollector::TopDocCollector->new(
size => 1000 );


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits