Mailing List Archive

r3872 - in trunk/perl: lib/KinoSearch/Docs/Cookbook sample
Author: creamyg
Date: 2008-09-10 22:33:15 -0700 (Wed, 10 Sep 2008)
New Revision: 3872

Added:
trunk/perl/sample/PrefixQuery.pm
Modified:
trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQuery.pod
trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQueryParser.pod
trunk/perl/sample/FlatQueryParser.pm
Log:
Add PrefixQuery, revise FlatQueryParser -- synchronize them with the Cookbook
entries. Fix a bunch of bugs in both.


Modified: trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQuery.pod
===================================================================
--- trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQuery.pod 2008-09-11 05:32:12 UTC (rev 3871)
+++ trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQuery.pod 2008-09-11 05:33:15 UTC (rev 3872)
@@ -217,7 +217,7 @@

sub DESTROY {
my $self = shift;
- delete $posting_lists{$$self};
+ delete $doc_nums{$$self};
delete $tick{$$self};
delete $tally{$$self};
$self->SUPER::DESTROY;
@@ -233,7 +233,7 @@
my $doc_nums = $doc_nums{$$self};
my $tick = ++$tick{$$self};
return 0 if $tick >= scalar @$doc_nums;
- return $doc_nums[$tick];
+ return $doc_nums->[$tick];
}

get_doc_num() returns the current document number, or 0 if the Scorer is
@@ -244,7 +244,7 @@
my $self = shift;
my $tick = $tick{$$self};
my $doc_nums = $doc_nums{$$self};
- return $tick < scalar @doc_nums ? $doc_nums[$tick] : 0;
+ return $tick < scalar @$doc_nums ? $doc_nums->[$tick] : 0;
}

tally() returns an object which isa L<KinoSearch::Search::Tally> and conveys the

Modified: trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQueryParser.pod
===================================================================
--- trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQueryParser.pod 2008-09-11 05:32:12 UTC (rev 3871)
+++ trunk/perl/lib/KinoSearch/Docs/Cookbook/CustomQueryParser.pod 2008-09-11 05:33:15 UTC (rev 3872)
@@ -207,7 +207,14 @@
conveys field and Analyzer information, so we can just defer to that.

package FlatQueryParser;
- use base ( KinoSearch::QueryParser );
+ use base qw( KinoSearch::QueryParser );
+ use KinoSearch::Search::TermQuery;
+ use KinoSearch::Search::PhraseQuery;
+ use KinoSearch::Search::ORQuery;
+ use KinoSearch::Search::NoMatchQuery;
+ use PrefixQuery;
+ use Parse::RecDescent;
+ use Carp;

our %rd_parser;

@@ -260,11 +267,18 @@
...

sub parse {
- my ( $self, $query_string ) = @_;
- my $tree = $rd_parser{$$self}->tree($query_string);
- return $self->expand($tree);
+ my ( $self, $query_string ) = @_;
+ my $tree = $self->tree($query_string);
+ return $tree ? $self->expand($tree) :
+ KinoSearch::Search::NoMatchQuery->new;
}

+ sub tree {
+ my ( $self, $query_string ) = @_;
+ return $rd_parser{$$self}->tree($query_string);
+ }
+
+
The magic happens in KinoSearch::QueryParser's expand() method, which walks
the ORQuery object we supply to it looking for LeafQuery objects, and calls
expand_leaf() for each one it finds. expand_leaf() performs field-specific

Modified: trunk/perl/sample/FlatQueryParser.pm
===================================================================
--- trunk/perl/sample/FlatQueryParser.pm 2008-09-11 05:32:12 UTC (rev 3871)
+++ trunk/perl/sample/FlatQueryParser.pm 2008-09-11 05:33:15 UTC (rev 3872)
@@ -3,54 +3,34 @@

package FlatQueryParser;
use base qw( KinoSearch::QueryParser );
+use KinoSearch::Search::TermQuery;
+use KinoSearch::Search::PhraseQuery;
+use KinoSearch::Search::ORQuery;
use KinoSearch::Search::NoMatchQuery;
+use PrefixQuery;
+use Parse::RecDescent;
+use Carp;

our %rd_parser;

-sub new {
- my $either = shift;
- my $self = $either->SUPER::new(@_);
- $rd_parser{$$self} = FlatQueryParser::RecDescent->new;
- return $self;
-}
-
-sub parse {
- my ( $self, $query_string ) = @_;
- my $tree = $self->tree($query_string);
- return $tree ? $self->expand($tree) :
- KinoSearch::Search::NoMatchQuery->new;
-}
-
-sub tree {
- my ( $self, $query_string ) = @_;
- return $rd_parser{$$self}->tree($query_string);
-}
-
-sub DESTROY {
- my $self = shift;
- delete $rd_parser{$$self};
- $self->SUPER::DESTROY;
-}
-
-package FlatQueryParser::RecDescent;
-use base qw( Parse::RecDescent );
-use KinoSearch::Search::LeafQuery;
-use KinoSearch::Search::ORQuery;
-
-
my $grammar = <<'END_GRAMMAR';

tree:
- leaf_query(s)
+ leaf_queries
{
$return = KinoSearch::Search::ORQuery->new;
$return->add_child($_) for @{ $item[1] };
}

+leaf_queries:
+ leaf_query(s?)
+ { $item{'leaf_query(s?)'} }
+
leaf_query:
phrase_query
+ | prefix_query
| term_query
-
+
term_query:
/(\S+)/
{ KinoSearch::Search::LeafQuery->new( text => $1 ) }
@@ -58,11 +38,57 @@
phrase_query:
/("[^"]*(?:"|$))/ # terminated by either quote or end of string
{ KinoSearch::Search::LeafQuery->new( text => $1 ) }
+
+prefix_query:
+ /(\w+\*)/
+ { KinoSearch::Search::LeafQuery->new( text => $1 ) }

END_GRAMMAR

-sub new { shift->SUPER::new($grammar) }
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ $rd_parser{$$self} = Parse::RecDescent->new($grammar);
+ return $self;
+}

+sub DESTROY {
+ my $self = shift;
+ delete $rd_parser{$$self};
+ $self->SUPER::DESTROY;
+}
+
+sub parse {
+ my ( $self, $query_string ) = @_;
+ my $tree = $self->tree($query_string);
+ return $tree ? $self->expand($tree) :
+ KinoSearch::Search::NoMatchQuery->new;
+}
+
+sub tree {
+ my ( $self, $query_string ) = @_;
+ return $rd_parser{$$self}->tree($query_string);
+}
+
+sub expand_leaf {
+ my ( $self, $leaf_query ) = @_;
+ my $text = $leaf_query->get_text;
+ if ( $text =~ /\*$/ ) {
+ my $or_query = KinoSearch::Search::ORQuery->new;
+ for my $field ( @{ $self->get_fields } ) {
+ my $prefix_query = PrefixQuery->new(
+ field => $field,
+ query_string => $text,
+ );
+ $or_query->add_child($prefix_query);
+ }
+ return $or_query;
+ }
+ else {
+ return $self->SUPER::expand_leaf($leaf_query);
+ }
+}
+
1;

__END__
@@ -83,11 +109,7 @@

=head1 DESCRIPTION

-FlatQueryParser is a subclass of KinoSearch::QueryParser which supports a more
-limited syntax: words and quoted phrases, but no parenthetical groupings,
-boolean operators C<AND OR NOT>, or prepended plus/minus. It's intended use
-is to serve as sample code for people who want to write their own
-grammar-based parsers.
+See L<KinoSearch::Docs::Cookbook::CustomQueryParser>.

=head1 COPYRIGHT


Added: trunk/perl/sample/PrefixQuery.pm
===================================================================
--- trunk/perl/sample/PrefixQuery.pm (rev 0)
+++ trunk/perl/sample/PrefixQuery.pm 2008-09-11 05:33:15 UTC (rev 3872)
@@ -0,0 +1,170 @@
+use strict;
+use warnings;
+
+package PrefixQuery;
+use base qw( KinoSearch::Search::Query );
+use Carp;
+
+# Inside-out member vars and hand-rolled accessors.
+my %query_string;
+my %field;
+sub get_query_string { my $self = shift; return $query_string{$$self} }
+sub get_field { my $self = shift; return $field{$$self} }
+
+sub new {
+ my ( $class, %args ) = @_;
+ my $query_string = delete $args{query_string};
+ my $field = delete $args{field};
+ my $self = $class->SUPER::new(%args);
+ confess("'query_string' param is required")
+ unless defined $query_string;
+ confess("Invalid query_string: '$query_string'")
+ unless $query_string =~ /\*\s*$/;
+ confess("'field' param is required")
+ unless defined $field;
+ $query_string{$$self} = $query_string;
+ $field{$$self} = $field;
+ return $self;
+}
+
+sub DESTROY {
+ my $self = shift;
+ delete $query_string{$$self};
+ delete $field{$$self};
+ $self->SUPER::DESTROY;
+}
+
+sub make_compiler {
+ my $self = shift;
+ return PrefixCompiler->new( @_, parent => $self );
+}
+
+sub to_string {
+ my $self = shift;
+ return "$field{$$self}:$query_string{$$self}";
+}
+
+package PrefixCompiler;
+use base qw( KinoSearch::Search::Compiler );
+
+sub make_scorer {
+ my ( $self, $index_reader ) = @_;
+
+ # Acquire a Lexicon and seek it to our query string.
+ my $substring = $self->get_parent->get_query_string;
+ $substring =~ s/\*.\s*$//;
+ my $field = $self->get_parent->get_field;
+ my $lexicon = $index_reader->lexicon( field => $field );
+ return unless $lexicon;
+ $lexicon->seek($substring);
+
+ # Accumulate PostingLists for each matching term.
+ my @posting_lists;
+ while ( defined( my $term = $lexicon->get_term ) ) {
+ warn "$term $substring";
+ last unless $term =~ /^$substring/;
+ my $posting_list = $index_reader->posting_list(
+ field => $field,
+ term => $term,
+ );
+ if ($posting_list) {
+ push @posting_lists, $posting_list;
+ }
+ last unless $lexicon->next;
+ }
+ return unless @posting_lists;
+
+ return PrefixScorer->new( posting_lists => \@posting_lists );
+}
+
+package PrefixScorer;
+use base qw( KinoSearch::Search::Scorer );
+
+# Inside-out member vars.
+my %doc_nums;
+my %tally;
+my %tick;
+
+sub new {
+ my ( $class, %args ) = @_;
+ my $posting_lists = delete $args{posting_lists};
+ my $self = $class->SUPER::new(%args);
+
+ # Cheesy but simple way of interleaving PostingList doc sets.
+ my %all_doc_nums;
+ for my $posting_list (@$posting_lists) {
+ while ( my $doc_num = $posting_list->next ) {
+ $all_doc_nums{$doc_num} = undef;
+ }
+ }
+ my @doc_nums = sort { $a <=> $b } keys %all_doc_nums;
+ $doc_nums{$$self} = \@doc_nums;
+
+ $tick{$$self} = -1;
+ $tally{$$self} = KinoSearch::Search::Tally->new;
+ $tally{$$self}->set_score(1.0); # fixed score of 1.0
+
+ return $self;
+}
+
+sub DESTROY {
+ my $self = shift;
+ delete $doc_nums{$$self};
+ delete $tick{$$self};
+ delete $tally{$$self};
+ $self->SUPER::DESTROY;
+}
+
+sub next {
+ my $self = shift;
+ my $doc_nums = $doc_nums{$$self};
+ my $tick = ++$tick{$$self};
+ return 0 if $tick >= scalar @$doc_nums;
+ return $doc_nums->[$tick];
+}
+
+sub get_doc_num {
+ my $self = shift;
+ my $tick = $tick{$$self};
+ my $doc_nums = $doc_nums{$$self};
+ return $tick < scalar @$doc_nums ? $doc_nums->[$tick] : 0;
+}
+
+sub tally {
+ my $self = shift;
+ return $tally{$$self};
+}
+
+1;
+
+__END__
+
+__POD__
+
+=head1 NAME
+
+PrefixQuery - Sample subclass of KinoSearch::Query, supporting trailing
+wildcards.
+
+=head1 SYNOPSIS
+
+ my $prefix_query = PrefixQuery->new(
+ field => 'content',
+ query_string => 'foo*',
+ );
+ my $hits = $searcher->search( query => $prefix_query );
+
+=head1 DESCRIPTION
+
+Seek L<KinoSearch::Docs::Cookbook::CustomQuery>.
+
+=head1 COPYRIGHT
+
+Copyright 2008 Marvin Humphrey
+
+=head1 LICENSE, DISCLAIMER, BUGS, etc.
+
+See L<KinoSearch> version 0.20.
+
+=cut
+


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits