Mailing List Archive

r3669 - in trunk: c_src/KinoSearch/Search c_src/KinoSearch/Search/HitCollector perl/t
Author: creamyg
Date: 2008-07-29 18:13:30 -0700 (Tue, 29 Jul 2008)
New Revision: 3669

Modified:
trunk/c_src/KinoSearch/Search/HitCollector.bp
trunk/c_src/KinoSearch/Search/HitCollector.c
trunk/c_src/KinoSearch/Search/HitCollector/SortCollector.bp
trunk/c_src/KinoSearch/Search/HitCollector/SortCollector.c
trunk/c_src/KinoSearch/Search/HitCollector/TopDocCollector.bp
trunk/c_src/KinoSearch/Search/HitCollector/TopDocCollector.c
trunk/c_src/KinoSearch/Search/Scorer.c
trunk/perl/t/506-hit_collector.t
Log:
Change API for HitCollector's Collect() method to take a Tally instead of a
floating point score.


Modified: trunk/c_src/KinoSearch/Search/HitCollector/SortCollector.bp
===================================================================
--- trunk/c_src/KinoSearch/Search/HitCollector/SortCollector.bp 2008-07-30 00:58:24 UTC (rev 3668)
+++ trunk/c_src/KinoSearch/Search/HitCollector/SortCollector.bp 2008-07-30 01:13:30 UTC (rev 3669)
@@ -16,7 +16,7 @@
init(SortCollector *self, FieldDocCollator *collator, u32_t size);

public void
- Collect(SortCollector *self, u32_t doc_num, float score);
+ Collect(SortCollector *self, u32_t doc_num, Tally *tally);

void
Destroy(SortCollector *self);

Modified: trunk/c_src/KinoSearch/Search/HitCollector/SortCollector.c
===================================================================
--- trunk/c_src/KinoSearch/Search/HitCollector/SortCollector.c 2008-07-30 00:58:24 UTC (rev 3668)
+++ trunk/c_src/KinoSearch/Search/HitCollector/SortCollector.c 2008-07-30 01:13:30 UTC (rev 3669)
@@ -5,6 +5,7 @@
#include "KinoSearch/Search/FieldDocCollator.h"
#include "KinoSearch/Search/HitQueue.h"
#include "KinoSearch/Search/SortedHitQueue.h"
+#include "KinoSearch/Search/Tally.h"

SortCollector*
SortColl_new(FieldDocCollator *collator, u32_t num_hits)
@@ -43,21 +44,21 @@
}

void
-SortColl_collect(SortCollector *self, u32_t doc_num, float score)
+SortColl_collect(SortCollector *self, u32_t doc_num, Tally *tally)
{
/* Add to the total number of hits. */
self->total_hits++;

/* Bail if the doc doesn't sort higher than the current bubble. */
if ( self->total_hits > self->num_hits
- && ( FDocCollator_Compare(self->collator, doc_num, score,
+ && ( FDocCollator_Compare(self->collator, doc_num, tally->score,
self->min_doc, self->min_score) )
) {
return;
}
else {
FieldDoc *const field_doc
- = FieldDoc_new(doc_num, score, self->collator);
+ = FieldDoc_new(doc_num, tally->score, self->collator);
SortedHitQueue *const hit_q = (SortedHitQueue*)self->hit_q;

SortedHitQ_Insert(hit_q, (Obj*)field_doc);

Modified: trunk/c_src/KinoSearch/Search/HitCollector/TopDocCollector.bp
===================================================================
--- trunk/c_src/KinoSearch/Search/HitCollector/TopDocCollector.bp 2008-07-30 00:58:24 UTC (rev 3668)
+++ trunk/c_src/KinoSearch/Search/HitCollector/TopDocCollector.bp 2008-07-30 01:13:30 UTC (rev 3669)
@@ -23,7 +23,7 @@
/** Keep highest scoring docs.
*/
public void
- Collect(TopDocCollector *self, u32_t doc_num, float score);
+ Collect(TopDocCollector *self, u32_t doc_num, Tally *tally);

void
Destroy(TopDocCollector *self);

Modified: trunk/c_src/KinoSearch/Search/HitCollector/TopDocCollector.c
===================================================================
--- trunk/c_src/KinoSearch/Search/HitCollector/TopDocCollector.c 2008-07-30 00:58:24 UTC (rev 3668)
+++ trunk/c_src/KinoSearch/Search/HitCollector/TopDocCollector.c 2008-07-30 01:13:30 UTC (rev 3669)
@@ -3,6 +3,7 @@
#include "KinoSearch/Search/HitCollector/TopDocCollector.h"
#include "KinoSearch/Search/HitQueue.h"
#include "KinoSearch/Search/ScoreDoc.h"
+#include "KinoSearch/Search/Tally.h"

TopDocCollector*
TDColl_new(u32_t num_hits)
@@ -37,19 +38,19 @@
}

void
-TDColl_collect(TopDocCollector *self, u32_t doc_num, float score)
+TDColl_collect(TopDocCollector *self, u32_t doc_num, Tally *tally)
{
/* Add to the total number of hits. */
self->total_hits++;

/* Bail if the score doesn't exceed the minimum. */
if ( self->total_hits > self->num_hits
- && score < self->min_score
+ && tally->score < self->min_score
) {
return;
}
else if (self->num_hits > 0) {
- ScoreDoc *const score_doc = ScoreDoc_new(doc_num, score);
+ ScoreDoc *const score_doc = ScoreDoc_new(doc_num, tally->score);
HitQueue *const hit_q = self->hit_q;

HitQ_Insert(hit_q, (Obj*)score_doc);

Modified: trunk/c_src/KinoSearch/Search/HitCollector.bp
===================================================================
--- trunk/c_src/KinoSearch/Search/HitCollector.bp 2008-07-30 00:58:24 UTC (rev 3668)
+++ trunk/c_src/KinoSearch/Search/HitCollector.bp 2008-07-30 01:13:30 UTC (rev 3669)
@@ -2,7 +2,7 @@

/** Process doc/score pairs.
*
- * A Scorer spits out raw doc_num/score pairs; a HitCollector decides what to do
+ * A Scorer spits out raw doc_num/Tally pairs; a HitCollector decides what to do
* with them, based on how the abstract Collect() method is implemented.
*/

@@ -14,14 +14,15 @@
static HitCollector*
init(HitCollector *self);

- /** Do something with a doc num and a score. (For instance,
+ /** Do something with a doc num and a Tally. (For instance,
* keep track of the docs with the ten highest scores.)
*
* @param doc_num A positive integer document number.
- * @param score A floating point score.
+ * @param tally A KinoSearch::Search::Tally (which communicates scoring
+ * data).
*/
public abstract void
- Collect(HitCollector *self, u32_t doc_num, float score);
+ Collect(HitCollector *self, u32_t doc_num, Tally *tally);
}

/** HitCollector which records doc nums in a BitVector.
@@ -45,10 +46,10 @@
Destroy(BitCollector *self);

/** Set bit in the object's BitVector for the supplied doc number.
- * Score is ignored.
+ * Scoring information is discarded.
*/
public void
- Collect(BitCollector *self, u32_t doc_num, float score);
+ Collect(BitCollector *self, u32_t doc_num, Tally *tally);
}

class KinoSearch::Search::HitCollector::OffsetCollector cnick OffsetColl
@@ -71,7 +72,7 @@
Destroy(OffsetCollector *self);

public void
- Collect(OffsetCollector *self, u32_t doc_num, float score);
+ Collect(OffsetCollector *self, u32_t doc_num, Tally *tally);
}

/* Copyright 2006-2008 Marvin Humphrey

Modified: trunk/c_src/KinoSearch/Search/HitCollector.c
===================================================================
--- trunk/c_src/KinoSearch/Search/HitCollector.c 2008-07-30 00:58:24 UTC (rev 3668)
+++ trunk/c_src/KinoSearch/Search/HitCollector.c 2008-07-30 01:13:30 UTC (rev 3669)
@@ -1,6 +1,7 @@
#include "KinoSearch/Util/ToolSet.h"

#include "KinoSearch/Search/HitCollector.h"
+#include "KinoSearch/Search/Tally.h"
#include "KinoSearch/Util/BitVector.h"
#include "KinoSearch/Util/IntMap.h"
#include "KinoSearch/Util/Native.h"
@@ -27,9 +28,9 @@
}

void
-BitColl_collect(BitCollector *self, u32_t doc_num, float score)
+BitColl_collect(BitCollector *self, u32_t doc_num, Tally *tally)
{
- UNUSED_VAR(score);
+ UNUSED_VAR(tally);

/* Add the doc_num to the BitVector. */
BitVec_Set(self->bit_vec, doc_num);
@@ -58,9 +59,9 @@
}

void
-OffsetColl_collect(OffsetCollector *self, u32_t doc_num, float score)
+OffsetColl_collect(OffsetCollector *self, u32_t doc_num, Tally *tally)
{
- HC_Collect(self->inner_coll, (doc_num + self->offset), score);
+ HC_Collect(self->inner_coll, (doc_num + self->offset), tally);
}

void

Modified: trunk/c_src/KinoSearch/Search/Scorer.c
===================================================================
--- trunk/c_src/KinoSearch/Search/Scorer.c 2008-07-30 00:58:24 UTC (rev 3668)
+++ trunk/c_src/KinoSearch/Search/Scorer.c 2008-07-30 01:13:30 UTC (rev 3669)
@@ -93,7 +93,7 @@

/* This doc is in range, so collect it. */
tally = Scorer_Tally(self);
- HC_Collect(hit_collector, doc_num, tally->score);
+ HC_Collect(hit_collector, doc_num, tally);
hits_this_seg++;
} while (0 != (doc_num = Scorer_Next(self)) );
}

Modified: trunk/perl/t/506-hit_collector.t
===================================================================
--- trunk/perl/t/506-hit_collector.t 2008-07-30 00:58:24 UTC (rev 3668)
+++ trunk/perl/t/506-hit_collector.t 2008-07-30 01:13:30 UTC (rev 3669)
@@ -37,9 +37,13 @@
use KinoSearch::Util::BitVector;

my @docs_and_scores = ( [ 0, 2 ], [ 5, 0 ], [ 10, 0 ], [ 1000, 1 ] );
+my $tally = KinoSearch::Search::Tally->new;

my $hc = KinoSearch::Search::HitCollector::TopDocCollector->new( size => 3, );
-$hc->collect( doc_num => $_->[0], score => $_->[1] ) for @docs_and_scores;
+for (@docs_and_scores) {
+ $tally->set_score($_->[1]);
+ $hc->collect( doc_num => $_->[0], tally => $tally );
+}

my $hit_queue = $hc->get_hit_q;
isa_ok( $hit_queue, 'KinoSearch::Search::HitQueue' );
@@ -48,7 +52,10 @@
is_deeply( \@scores, [ 2, 1, 0 ], "collect into HitQueue" );

$hc = KinoSearch::Search::HitCollector::TopDocCollector->new( size => 0 );
-$hc->collect( doc_num => $_->[0], score => $_->[1] ) for @docs_and_scores;
+for (@docs_and_scores) {
+ $tally->set_score($_->[1]);
+ $hc->collect( doc_num => $_->[0], tally => $tally );
+}
$hit_queue = $hc->get_hit_q;
is( $hc->get_total_hits, 4,
"get_total_hits is accurate when no hits are requested" );
@@ -58,7 +65,10 @@
my $bit_vec = KinoSearch::Util::BitVector->new;
$hc = KinoSearch::Search::HitCollector::BitCollector->new(
bit_vector => $bit_vec );
-$hc->collect( doc_num => $_->[0], score => $_->[1] ) for @docs_and_scores;
+for (@docs_and_scores) {
+ $tally->set_score($_->[1]);
+ $hc->collect( doc_num => $_->[0], tally => $tally );
+}
is_deeply(
$bit_vec->to_arrayref,
[ 0, 5, 10, 1000 ],
@@ -72,12 +82,17 @@
collector => $inner_coll,
offset => 10,
);
-$offset_coll->collect( doc_num => $_->[0], score => $_->[1] )
- for @docs_and_scores;
+for (@docs_and_scores) {
+ $tally->set_score($_->[1]);
+ $offset_coll->collect( doc_num => $_->[0], tally => $tally );
+}
is_deeply( $bit_vec->to_arrayref, [ 10, 15, 20, 1010 ], "Offset collector" );

$hc = EvensOnlyHitCollector->new;
-$hc->collect( doc_num => $_->[0], score => $_->[1] ) for @docs_and_scores;
+for (@docs_and_scores) {
+ $tally->set_score($_->[1]);
+ $hc->collect( doc_num => $_->[0], tally => $tally );
+}
is_deeply(
$hc->get_doc_nums,
[ 0, 10, 1000 ],


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits