Mailing List Archive

r3664 - in trunk: c_src/KinoSearch/Search perl perl/lib/KinoSearch/Search perl/lib/KinoSearch/Search/HitCollector perl/t
Author: creamyg
Date: 2008-07-29 14:34:46 -0700 (Tue, 29 Jul 2008)
New Revision: 3664

Added:
trunk/perl/lib/KinoSearch/Search/HitCollector/BitCollector.pm
Modified:
trunk/c_src/KinoSearch/Search/HitCollector.bp
trunk/perl/MANIFEST
trunk/perl/lib/KinoSearch/Search/HitCollector.pm
trunk/perl/t/506-hit_collector.t
Log:
Expose HitCollector and BitCollector as public classes. Change XS binding for
Collect() to use labeled params, so that callbacks work properly.


Modified: trunk/c_src/KinoSearch/Search/HitCollector.bp
===================================================================
--- trunk/c_src/KinoSearch/Search/HitCollector.bp 2008-07-29 21:31:29 UTC (rev 3663)
+++ trunk/c_src/KinoSearch/Search/HitCollector.bp 2008-07-29 21:34:46 UTC (rev 3664)
@@ -3,32 +3,40 @@
/** Process doc/score pairs.
*
* A Scorer spits out raw doc_num/score pairs; a HitCollector decides what to do
- * with them, based on the collect() method.
+ * with them, based on how the abstract Collect() method is implemented.
*/

-class KinoSearch::Search::HitCollector cnick HC
+abstract class KinoSearch::Search::HitCollector cnick HC
extends KinoSearch::Obj {

+ /** Abstract constructor. Takes no arguments.
+ */
static HitCollector*
init(HitCollector *self);

/** Do something with a doc num and a score. (For instance,
* keep track of the docs with the ten highest scores.)
+ *
+ * @param doc_num A positive integer document number.
+ * @param score A floating point score.
*/
abstract void
Collect(HitCollector *self, u32_t doc_num, float score);
}

+/** HitCollector which records doc nums in a BitVector.
+ *
+ * BitCollector is a HitCollector which saves matching document numbers in a
+ * BitVector while ignoring scores. It is useful for recording the entire
+ * set of documents which matches a query.
+ */
class KinoSearch::Search::HitCollector::BitCollector cnick BitColl
extends KinoSearch::Search::HitCollector {

BitVector *bit_vec;

- static incremented BitCollector*
- new(BitVector *bit_vector);
-
- /** Return a HitCollector which sets a set bit for each matching doc number
- * (scores are irrelevant).
+ /**
+ * @param bit_vector A KinoSearch::Util::BitVector.
*/
static BitCollector*
init(BitCollector *self, BitVector *bit_vector);
@@ -36,6 +44,9 @@
void
Destroy(BitCollector *self);

+ /** Set bit in the object's BitVector for the supplied doc number.
+ * Score is ignored.
+ */
void
Collect(BitCollector *self, u32_t doc_num, float score);
}

Modified: trunk/perl/MANIFEST
===================================================================
--- trunk/perl/MANIFEST 2008-07-29 21:31:29 UTC (rev 3663)
+++ trunk/perl/MANIFEST 2008-07-29 21:34:46 UTC (rev 3664)
@@ -125,6 +125,7 @@
lib/KinoSearch/Search/FieldDoc.pm
lib/KinoSearch/Search/FieldDocCollator.pm
lib/KinoSearch/Search/HitCollector.pm
+lib/KinoSearch/Search/HitCollector/BitCollector.pm
lib/KinoSearch/Search/HitCollector/SortCollector.pm
lib/KinoSearch/Search/HitCollector/TopDocCollector.pm
lib/KinoSearch/Search/HitQueue.pm

Added: trunk/perl/lib/KinoSearch/Search/HitCollector/BitCollector.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Search/HitCollector/BitCollector.pm (rev 0)
+++ trunk/perl/lib/KinoSearch/Search/HitCollector/BitCollector.pm 2008-07-29 21:34:46 UTC (rev 3664)
@@ -0,0 +1,44 @@
+use KinoSearch;
+
+1;
+
+__END__
+
+__AUTO_XS__
+
+my $synopsis = <<'END_SYNOPSIS';
+ my $bit_vec = KinoSearch::Util::BitVector->new(
+ capacity => $searcher->max_docs + 1,
+ );
+ my $bit_collector = KinoSearch::Search::HitCollector::BitCollector->new(
+ bit_vector => $bit_vec,
+ );
+ $searcher->collect(
+ collector => $bit_collector,
+ query => $query,
+ );
+END_SYNOPSIS
+
+my $constructor = <<'END_CONSTRUCTOR';
+ my $bit_collector = KinoSearch::Search::HitCollector::BitCollector->new(
+ bit_vector => $bit_vec, # required
+ );
+END_CONSTRUCTOR
+
+{ "KinoSearch::Search::HitCollector::BitCollector" => {
+ make_constructors => ["new"],
+ make_pod => {
+ synopsis => $synopsis,
+ constructor => { sample => $constructor },
+ methods => [qw( collect )],
+ },
+ },
+}
+
+__COPYRIGHT__
+
+Copyright 2005-2008 Marvin Humphrey
+
+This program is free software; you can redistribute it and/or modify
+under the same terms as Perl itself.
+

Modified: trunk/perl/lib/KinoSearch/Search/HitCollector.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Search/HitCollector.pm 2008-07-29 21:31:29 UTC (rev 3663)
+++ trunk/perl/lib/KinoSearch/Search/HitCollector.pm 2008-07-29 21:34:46 UTC (rev 3664)
@@ -6,31 +6,33 @@

__AUTO_XS__

+my $constructor = <<'END_CONSTRUCTOR';
+ package MyHitCollector;
+ use base qw( KinoSearch::Search::HitCollector );
+ our %foo;
+ sub new {
+ my $self = shift->SUPER::new;
+ my %args = @_;
+ $foo{$$self} = $args{foo};
+ return $self;
+ }
+END_CONSTRUCTOR
+
{
"KinoSearch::Search::HitCollector" => {
+ bind_methods => [qw( collect )],
make_constructors => ["new"],
+ make_pod => {
+ synopsis => " # Abstract base class.\n",
+ constructor => { sample => $constructor },
+ methods => [qw( collect )],
+ },
},
- "KinoSearch::Search::HitCollector::BitCollector" => {
- make_constructors => ["new"],
- },
"KinoSearch::Search::HitCollector::OffsetCollector" => {
make_constructors => ["new"],
},
}

-__XS__
-
-MODULE = KinoSearch PACKAGE = KinoSearch::Search::HitCollector
-
-void
-collect(self, doc_num, score)
- kino_HitCollector *self;
- chy_u32_t doc_num;
- float score;
-PPCODE:
- ABSTRACT_METHOD_CHECK(self, HC, Collect, collect);
- Kino_HC_Collect(self, doc_num, score);
-
__COPYRIGHT__

Copyright 2005-2008 Marvin Humphrey

Modified: trunk/perl/t/506-hit_collector.t
===================================================================
--- trunk/perl/t/506-hit_collector.t 2008-07-29 21:31:29 UTC (rev 3663)
+++ trunk/perl/t/506-hit_collector.t 2008-07-29 21:34:46 UTC (rev 3664)
@@ -10,7 +10,7 @@
my @docs_and_scores = ( [ 0, 2 ], [ 5, 0 ], [ 10, 0 ], [ 1000, 1 ] );

my $hc = KinoSearch::Search::HitCollector::TopDocCollector->new( size => 3, );
-$hc->collect( $_->[0], $_->[1] ) for @docs_and_scores;
+$hc->collect( doc_num => $_->[0], score => $_->[1] ) for @docs_and_scores;

my $hit_queue = $hc->get_hit_q;
isa_ok( $hit_queue, 'KinoSearch::Search::HitQueue' );
@@ -19,7 +19,7 @@
is_deeply( \@scores, [ 2, 1, 0 ], "collect into HitQueue" );

$hc = KinoSearch::Search::HitCollector::TopDocCollector->new( size => 0 );
-$hc->collect( $_->[0], $_->[1] ) for @docs_and_scores;
+$hc->collect( doc_num => $_->[0], score => $_->[1] ) for @docs_and_scores;
$hit_queue = $hc->get_hit_q;
is( $hc->get_total_hits, 4,
"get_total_hits is accurate when no hits are requested" );
@@ -29,7 +29,7 @@
my $bit_vec = KinoSearch::Util::BitVector->new;
$hc = KinoSearch::Search::HitCollector::BitCollector->new(
bit_vector => $bit_vec );
-$hc->collect( $_->[0], $_->[1] ) for @docs_and_scores;
+$hc->collect( doc_num => $_->[0], score => $_->[1] ) for @docs_and_scores;
is_deeply(
$bit_vec->to_arrayref,
[ 0, 5, 10, 1000 ],
@@ -43,5 +43,6 @@
collector => $inner_coll,
offset => 10,
);
-$offset_coll->collect( $_->[0], $_->[1] ) for @docs_and_scores;
+$offset_coll->collect( doc_num => $_->[0], score => $_->[1] )
+ for @docs_and_scores;
is_deeply( $bit_vec->to_arrayref, [ 10, 15, 20, 1010 ], "Offset collector" );


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits