Mailing List Archive

r3688 - in trunk: c_src/KinoSearch/Highlight perl/lib/KinoSearch/Highlight
Author: creamyg
Date: 2008-08-02 11:32:46 -0700 (Sat, 02 Aug 2008)
New Revision: 3688

Modified:
trunk/c_src/KinoSearch/Highlight/Highlighter.bp
trunk/c_src/KinoSearch/Highlight/Highlighter.c
trunk/perl/lib/KinoSearch/Highlight/Highlighter.pm
Log:
Port Highlighter docs and setters/getters to C.


Modified: trunk/c_src/KinoSearch/Highlight/Highlighter.bp
===================================================================
--- trunk/c_src/KinoSearch/Highlight/Highlighter.bp 2008-07-31 15:57:38 UTC (rev 3687)
+++ trunk/c_src/KinoSearch/Highlight/Highlighter.bp 2008-08-02 18:32:46 UTC (rev 3688)
@@ -1,5 +1,12 @@
parcel KinoSearch cnick Kino;

+/** Create and highlight excerpts.
+ *
+ * The Highlighter can be used to select relevant snippets from a
+ * document, and to surround search terms with highlighting tags. It handles
+ * both stems and phrases correctly and efficiently, using special-purpose data
+ * generated at index-time.
+*/
class KinoSearch::Highlight::Highlighter extends KinoSearch::Obj {

Searchable *searchable;
@@ -16,13 +23,31 @@
new(Searchable *searchable, Obj *query, const CharBuf *field,
u32_t excerpt_length = 200);

+ /**
+ * @param searchable An object which inherits from
+ * L<Searchable|KinoSearch::Search::Searchable>, such as a
+ * L<Searcher|KinoSearch::Searcher>.
+ * @param query Query object or a query string.
+ * @param field The name of the field from which to draw the excerpt. The field
+ * must be C<vectorized> (which is the default -- see
+ * L<FieldSpec|KinoSearch::FieldSpec>).
+ * @param excerpt_length Maximum length of the excerpt, in characters.
+ */
static incremented Highlighter*
init(Highlighter *self, Searchable *searchable, Obj *query,
const CharBuf *field, u32_t excerpt_length = 200);

+ /** Take a HitDoc object and return a highlighted excerpt as a string if
+ * the HitDoc has a value for the specified <code>field</code>.
+ */
public incremented CharBuf*
Create_Excerpt(Highlighter *self, HitDoc *hit_doc);

+ /** Encode text with HTML entities. This method is called internally by
+ * Create_Excerpt() for each text fragment when assembling an excerpt. A
+ * subclass can override this if the text should be encoded differently or
+ * not at all.
+ */
public incremented CharBuf*
Encode(Highlighter *self, CharBuf *text);

@@ -43,21 +68,59 @@
Find_Sentences(Highlighter *self, CharBuf *text, i32_t offset = 0,
i32_t length = 0);

+ /** Highlight a small section of text. By default, prepends pre-tag and
+ * appends post-tag. This method is called internally by Create_Excerpt()
+ * when assembling an excerpt.
+ */
public incremented CharBuf*
Highlight(Highlighter *self, const CharBuf *text);

+ /** Setter. The default value is "<strong>".
+ */
void
Set_Pre_Tag(Highlighter *self, const CharBuf *pre_tag);

+ /** Setter. The default value is "</strong>".
+ */
void
Set_Post_Tag(Highlighter *self, const CharBuf *post_tag);

+ /** Accessor.
+ */
CharBuf*
Get_Pre_Tag(Highlighter *self);

+ /** Accessor.
+ */
CharBuf*
Get_Post_Tag(Highlighter *self);

+ /** Accessor.
+ */
+ CharBuf*
+ Get_Field(Highlighter *self);
+
+ /** Accessor.
+ */
+ u32_t
+ Get_Excerpt_Length(Highlighter *self);
+
+ /** Accessor.
+ */
+ Searchable*
+ Get_Searchable(Highlighter *self);
+
+ /** Accessor.
+ */
+ Query*
+ Get_Query(Highlighter *self);
+
+ /** Accessor for the KinoSearch::Search::Compiler object derived from
+ * <code>query</code> and <code>searchable</code>.
+ */
+ Compiler*
+ Get_Compiler(Highlighter *self);
+
/* Decide based on heat map the best fragment of field to concentrate on.
* Place the result into [fragment] and return its offset in code points
* from the top of the field.

Modified: trunk/c_src/KinoSearch/Highlight/Highlighter.c
===================================================================
--- trunk/c_src/KinoSearch/Highlight/Highlighter.c 2008-07-31 15:57:38 UTC (rev 3687)
+++ trunk/c_src/KinoSearch/Highlight/Highlighter.c 2008-08-02 18:32:46 UTC (rev 3688)
@@ -70,10 +70,22 @@
void
Highlighter_set_post_tag(Highlighter *self, const CharBuf *post_tag)
{ CB_Copy(self->post_tag, post_tag); }
+
CharBuf*
-Highlighter_get_pre_tag(Highlighter *self) { return self->pre_tag; }
+Highlighter_get_pre_tag(Highlighter *self) { return self->pre_tag; }
CharBuf*
-Highlighter_get_post_tag(Highlighter *self) { return self->post_tag; }
+Highlighter_get_post_tag(Highlighter *self) { return self->post_tag; }
+CharBuf*
+Highlighter_get_field(Highlighter *self) { return self->field; }
+Query*
+Highlighter_get_query(Highlighter *self) { return self->query; }
+Searchable*
+Highlighter_get_searchable(Highlighter *self) { return self->searchable; }
+Compiler*
+Highlighter_get_compiler(Highlighter *self) { return self->compiler; }
+u32_t
+Highlighter_get_excerpt_length(Highlighter *self)
+ { return self->excerpt_length; }

CharBuf*
Highlighter_create_excerpt(Highlighter *self, HitDoc *hit_doc)

Modified: trunk/perl/lib/KinoSearch/Highlight/Highlighter.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Highlight/Highlighter.pm 2008-07-31 15:57:38 UTC (rev 3687)
+++ trunk/perl/lib/KinoSearch/Highlight/Highlighter.pm 2008-08-02 18:32:46 UTC (rev 3688)
@@ -6,6 +6,28 @@

__AUTO_XS__

+my $synopsis = <<'END_SYNOPSIS';
+ my $highlighter = KinoSearch::Highlight::Highlighter->new(
+ searchable => $searcher,
+ query => $query,
+ field => 'body'
+ );
+ my $hits = $searcher->search( query => $query );
+ while ( my $hit = $hits->fetch_hit ) {
+ my $excerpt = $highlighter->create_excerpt($hit);
+ ...
+ }
+END_SYNOPSIS
+
+my $constructor = <<'END_CONSTRUCTOR';
+ my $highlighter = KinoSearch::Highlight::Highlighter->new(
+ searchable => $searcher, # required
+ query => $query, # required
+ field => 'content', # required
+ excerpt_length => 150, # default: 200
+ );
+END_CONSTRUCTOR
+
{ "KinoSearch::Highlight::Highlighter" => {
bind_methods => [.
qw( highlight
@@ -14,120 +36,43 @@
_find_best_fragment|find_best_fragment
_raw_excerpt|raw_excerpt
_highlight_excerpt|highlight_excerpt
+ _find_sentences|find_sentences
set_pre_tag
get_pre_tag
set_post_tag
get_post_tag
- _find_sentences|find_sentences )
+ get_searchable
+ get_query
+ get_compiler
+ get_excerpt_length
+ get_field )
],
- make_getters =>
- [qw( searchable query excerpt_length compiler field )],
make_constructors => ["new"],
+ make_pod => {
+ synopsis => $synopsis,
+ constructor => { sample => $constructor },
+ methods => [.
+ qw( create_excerpt
+ highlight
+ encode
+ set_pre_tag
+ set_pre_tag
+ set_post_tag
+ get_post_tag
+ get_searchable
+ get_query
+ get_compiler
+ get_excerpt_length
+ get_field )
+ ]
+ },
}
}

-__POD__
+__COPYRIGHT__

-=head1 NAME
-
-KinoSearch::Highlight::Highlighter - Create and highlight excerpts.
-
-=head1 SYNOPSIS
-
- my $highlighter = KinoSearch::Highlight::Highlighter->new(
- searchable => $searcher,
- query => 'foo bar',
- field => 'body'
- );
-
- my $excerpt = $highlighter->create_excerpt( $hit );
-
-=head1 DESCRIPTION
-
-KinoSearch's Highlighter can be used to select relevant snippets from a
-document, and to surround search terms with highlighting tags. It handles
-both stems and phrases correctly and efficiently, using special-purpose data
-generated at index-time.
-
-=head1 METHODS
-
-=head2 new
-
- my $highlighter = KinoSearch::Highlight::Highlighter->new(
- searchable => $searcher, # required
- query => $query, # required
- field => 'content', # required
- excerpt_length => 150, # default: 200
- );
-
-Constructor. Takes hash-style parameters:
-
-=over
-
-=item *
-
-B<searchable> - An object which isa
-L<Searchable|KinoSearch::Search::Searchable>, such as a
-L<Searcher|KinoSearch::Searcher>.
-
-=item *
-
-B<query> - the Query object or a query string.
-
-=item *
-
-B<field> - the name of the field from which to draw the excerpt. The field
-must be C<vectorized> (which is the default -- see
-L<FieldSpec|KinoSearch::FieldSpec>).
-
-=item *
-
-B<excerpt_length> - the maximum length of the excerpt, in characters.
-
-=back
-
-=head2 get_pre_tag, set_pre_tag
-
-These are accessor methods. The C<pre_tag> is a string which will be
-inserted immediately prior to a word or phrase that is to be highlighted.
-The default is '<strong>'. If you don't want highlighting, set both
-C<pre_tag> and C<post_tag> to C<''>.
-
-=head2 get_post_tag, set_post_tag
-
-These, too, are accessor methods. The post_tag is a string which will be
-inserted immediately after the highlightable text. The default is
-'</strong>'.
-
-=head2 create_excerpt
-
-This method takes a hit (a HitDoc object) as its sole argument and returns
-an excerpt as a string.
-
-=head2 get_compiler
-
-This returns a KinoSearch::Search::Compiler object associated with the query.
-
-=head2 get_excerpt_length, get_searchable, get_field
-
-These return the values passed to the constructor.
-
-=head2 highlight
-
-This method highlights the text passed to it by putting tags before and
-after. This method exists so that it can be overridden in a subclass.
-
-=head2 encode
-
-This method encodes the text passed to it with HTML entities. A subclass
-can override this if the text must be encoded differently or not at all.
-
-=head1 COPYRIGHT
-
Copyright 2005-2008 Marvin Humphrey

-=head1 LICENSE, DISCLAIMER, BUGS, etc.
+This program is free software; you can redistribute it and/or modify
+under the same terms as Perl itself.

-See L<KinoSearch> version 0.20.
-
-=cut


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits