Mailing List Archive

r3680 - in trunk: c_src/KinoSearch/Highlight c_src/KinoSearch/Search perl/lib perl/lib/KinoSearch/Highlight perl/lib/KinoSearch/Search perl/t
Author: creamyg
Date: 2008-07-30 13:30:18 -0700 (Wed, 30 Jul 2008)
New Revision: 3680

Modified:
trunk/c_src/KinoSearch/Highlight/Highlighter.bp
trunk/c_src/KinoSearch/Highlight/Highlighter.c
trunk/c_src/KinoSearch/Search/Searchable.bp
trunk/c_src/KinoSearch/Search/Searchable.c
trunk/perl/lib/KinoSearch.pm
trunk/perl/lib/KinoSearch/Highlight/Highlighter.pm
trunk/perl/lib/KinoSearch/Search/Searchable.pm
trunk/perl/t/509-multi_searcher.t
Log:
Replace Searchable_Prepare_Simple_Search with Searchable_Glean_Query and
expose it as a public method. Cache QueryParser within a Searchable when
Glean_Query() is called rather than creating one anew each time. Eliminate
Perl code in Highlighter's constructor, since we can call Glean_Query() from
C. Lazy load HTML::Entities.


Modified: trunk/c_src/KinoSearch/Highlight/Highlighter.bp
===================================================================
--- trunk/c_src/KinoSearch/Highlight/Highlighter.bp 2008-07-30 18:17:13 UTC (rev 3679)
+++ trunk/c_src/KinoSearch/Highlight/Highlighter.bp 2008-07-30 20:30:18 UTC (rev 3680)
@@ -13,11 +13,11 @@
Compiler *compiler;

static incremented Highlighter*
- new(Searchable *searchable, Query *query, const CharBuf *field,
+ new(Searchable *searchable, Obj *query, const CharBuf *field,
u32_t excerpt_length = 200);

static incremented Highlighter*
- init(Highlighter *self, Searchable *searchable, Query *query,
+ init(Highlighter *self, Searchable *searchable, Obj *query,
const CharBuf *field, u32_t excerpt_length = 200);

CharBuf*

Modified: trunk/c_src/KinoSearch/Highlight/Highlighter.c
===================================================================
--- trunk/c_src/KinoSearch/Highlight/Highlighter.c 2008-07-30 18:17:13 UTC (rev 3679)
+++ trunk/c_src/KinoSearch/Highlight/Highlighter.c 2008-07-30 20:30:18 UTC (rev 3680)
@@ -13,7 +13,7 @@
#include "KinoSearch/Util/Native.h"

Highlighter*
-Highlighter_new(Searchable *searchable, Query *query, const CharBuf *field,
+Highlighter_new(Searchable *searchable, Obj *query, const CharBuf *field,
u32_t excerpt_length)
{
Highlighter *self = (Highlighter*)CREATE(NULL, HIGHLIGHTER);
@@ -21,14 +21,14 @@
}

Highlighter*
-Highlighter_init(Highlighter *self, Searchable *searchable, Query *query,
+Highlighter_init(Highlighter *self, Searchable *searchable, Obj *query,
const CharBuf *field, u32_t excerpt_length)
{
+ self->query = Searchable_Glean_Query(searchable, query);
self->searchable = REFCOUNT_INC(searchable);
- self->query = REFCOUNT_INC(query);
self->field = CB_Clone(field);
- self->compiler = Query_Make_Compiler(query, searchable,
- Query_Get_Boost(query));
+ self->compiler = Query_Make_Compiler(self->query, searchable,
+ Query_Get_Boost(self->query));
self->excerpt_length = excerpt_length;
self->slop = excerpt_length / 3;
self->window_width = excerpt_length + (self->slop * 2);

Modified: trunk/c_src/KinoSearch/Search/Searchable.bp
===================================================================
--- trunk/c_src/KinoSearch/Search/Searchable.bp 2008-07-30 18:17:13 UTC (rev 3679)
+++ trunk/c_src/KinoSearch/Search/Searchable.bp 2008-07-30 20:30:18 UTC (rev 3680)
@@ -8,7 +8,8 @@

class KinoSearch::Search::Searchable extends KinoSearch::Obj {

- Schema *schema;
+ Schema *schema;
+ QueryParser *qparser;

/** Abstract constructor.
*
@@ -36,19 +37,16 @@
abstract u32_t
Doc_Freq(Searchable *self, const CharBuf *field, Obj *term);

- /** Use the supplied query string to prepare a composite Query against all
+ /** If the supplied object is a Query, return it; if it's a query string,
+ * create a QueryParser and parse it to produce a query against all
* indexed fields.
*/
incremented Query*
- Prepare_Simple_Search(Searchable *self, const CharBuf *query = NULL);
+ Glean_Query(Searchable *self, Obj *query = NULL);

/** Return a Hits object containing the top results.
*
- * @param query Either a Query object or a query string. If it's a query
- * string, it will be parsed using a QueryParser and a search will be
- * performed against all indexed fields. For more sophisticated
- * searching, supply Query objects, such as TermQuery, PhraseQuery,
- * ANDQuery, etc.
+ * @param query Either a Query object or a query string.
* @param offset The number of most-relevant hits to discard, typically
* used when "paging" through hits N at a time. Setting
* <code>offset</code> to 20 and <code>num_wanted</code> to 10 retrieves

Modified: trunk/c_src/KinoSearch/Search/Searchable.c
===================================================================
--- trunk/c_src/KinoSearch/Search/Searchable.c 2008-07-30 18:17:13 UTC (rev 3679)
+++ trunk/c_src/KinoSearch/Search/Searchable.c 2008-07-30 20:30:18 UTC (rev 3680)
@@ -18,7 +18,8 @@
Searchable*
Searchable_init(Searchable *self, Schema *schema)
{
- self->schema = REFCOUNT_INC(schema);
+ self->schema = REFCOUNT_INC(schema);
+ self->qparser = NULL;
ABSTRACT_CLASS_CHECK(self, SEARCHABLE);
return self;
}
@@ -27,6 +28,7 @@
Searchable_destroy(Searchable *self)
{
REFCOUNT_DEC(self->schema);
+ REFCOUNT_DEC(self->qparser);
FREE_OBJ(self);
}

@@ -34,10 +36,20 @@
Searchable_search(Searchable *self, Obj *query, u32_t offset,
u32_t num_wanted, SortSpec *sort_spec)
{
- Query *real_query = NULL;
- TopDocs *top_docs;
- Hits *hits;
+ Query *real_query = Searchable_Glean_Query(self, query);
+ TopDocs *top_docs = Searchable_Top_Docs(self, real_query,
+ offset + num_wanted, sort_spec);
+ Hits *hits = Hits_new(self, top_docs, offset);
+ REFCOUNT_DEC(top_docs);
+ REFCOUNT_DEC(real_query);
+ return hits;
+}

+Query*
+Searchable_glean_query(Searchable *self, Obj *query)
+{
+ Query *real_query = NULL;
+
if (!query) {
real_query = (Query*)NoMatchQuery_new();
}
@@ -45,31 +57,18 @@
real_query = (Query*)REFCOUNT_INC(query);
}
else if (OBJ_IS_A(query, CHARBUF)) {
- real_query = Searchable_Prepare_Simple_Search(self, (CharBuf*)query);
+ if (!self->qparser)
+ self->qparser = QParser_new(self->schema, NULL, NULL, NULL);
+ real_query = QParser_Parse(self->qparser, (CharBuf*)query);
}
else {
CONFESS("Invalid type for 'query' param: %o",
Obj_Get_Class_Name(query));
}

- top_docs = Searchable_Top_Docs(self, real_query, offset + num_wanted,
- sort_spec);
- hits = Hits_new(self, top_docs, offset);
- REFCOUNT_DEC(top_docs);
- REFCOUNT_DEC(real_query);
- return hits;
+ return real_query;
}

-Query*
-Searchable_prepare_simple_search(Searchable *self,
- const CharBuf *query_string)
-{
- QueryParser *qparser = QParser_new(self->schema, NULL, NULL, NULL);
- Query *query = QParser_Parse(qparser, query_string);
- REFCOUNT_DEC(qparser);
- return query;
-}
-
Schema*
Searchable_get_schema(Searchable *self)
{

Modified: trunk/perl/lib/KinoSearch/Highlight/Highlighter.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Highlight/Highlighter.pm 2008-07-30 18:17:13 UTC (rev 3679)
+++ trunk/perl/lib/KinoSearch/Highlight/Highlighter.pm 2008-07-30 20:30:18 UTC (rev 3680)
@@ -22,7 +22,7 @@
],
make_getters =>
[qw( searchable query excerpt_length compiler field )],
- make_constructors => ["_new"],
+ make_constructors => ["new"],
}
}


Modified: trunk/perl/lib/KinoSearch/Search/Searchable.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Search/Searchable.pm 2008-07-30 18:17:13 UTC (rev 3679)
+++ trunk/perl/lib/KinoSearch/Search/Searchable.pm 2008-07-30 20:30:18 UTC (rev 3680)
@@ -20,7 +20,7 @@
bind_methods => [.
qw( max_docs
doc_freq
- prepare_simple_search
+ glean_query
search
collect
top_docs
@@ -36,6 +36,7 @@
methods => [.qw(
search
collect
+ glean_query
max_docs
doc_freq
get_schema

Modified: trunk/perl/lib/KinoSearch.pm
===================================================================
--- trunk/perl/lib/KinoSearch.pm 2008-07-30 18:17:13 UTC (rev 3679)
+++ trunk/perl/lib/KinoSearch.pm 2008-07-30 20:30:18 UTC (rev 3680)
@@ -425,25 +425,17 @@
package KinoSearch::Highlight::Highlighter;
use KinoSearch::Util::ToolSet qw( confess a_isa_b );

- use HTML::Entities qw( encode_entities );
- use KinoSearch::Highlight::HeatMap;
+ my $html_entities_loaded = 0;

- sub new {
- my ( $either, %args ) = @_;
- my $query = delete $args{query};
- my $searchable = $args{searchable};
- confess 'searchable is mandatory'
- unless a_isa_b( $searchable, "KinoSearch::Search::Searchable" );
- # Turn a query string into an object.
- if ( !a_isa_b( $query, 'KinoSearch::Search::Query' ) ) {
- $query = $searchable->prepare_simple_search($query);
+ sub do_encode {
+ if ( !$html_entities_loaded ) {
+ require HTML::Entities;
+ HTML::Entities->import('encode_entities');
+ $html_entities_loaded = 1;
}
- my $self = $either->_new( %args, query => $query );
- return $self;
+ return encode_entities( $_[1] );
}

- sub do_encode { return encode_entities( $_[1] ) }
-
sub find_sentence_boundaries {
my $self = shift;
return $self->_fsb(@_)->to_arrayref;

Modified: trunk/perl/t/509-multi_searcher.t
===================================================================
--- trunk/perl/t/509-multi_searcher.t 2008-07-30 18:17:13 UTC (rev 3679)
+++ trunk/perl/t/509-multi_searcher.t 2008-07-30 20:30:18 UTC (rev 3680)
@@ -43,6 +43,6 @@
capacity => $multi_searcher->max_docs );
my $bitcoll = KinoSearch::Search::HitCollector::BitCollector->new(
bit_vector => $bit_vec );
-my $query = $multi_searcher->prepare_simple_search('b');
+my $query = $multi_searcher->glean_query('b');
$multi_searcher->collect( query => $query, collector => $bitcoll );
is_deeply( $bit_vec->to_arrayref, [ 2, 4 ], "collect" );


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits