Mailing List Archive

newbie: Indexing and searching text not working
Hi list,

I am a newbie here
I have written an indexer to index all my mails. I am able to search on
to & from fields but not search on the actual body
can someone help me where I am going wrong

I have the script below




---------------------------------------------------------------------------------------------------------------
use KinoSearch::InvIndexer;
use KinoSearch::Analysis::PolyAnalyzer;
use KinoSearch::Searcher;
use strict;
#
# Start on a clean slate
#
system("rm -rf /tmp/invindex/*");

my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language =>
'en' );
@gl::headers = qw(from to cc subject body date reply-to message-id
in-reply-to filename);
my $invindexer = KinoSearch::InvIndexer->new(
invindex => '/tmp/invindex',
create => 1,
analyzer => $analyzer,
);
foreach (@gl::headers) {
$invindexer->spec_field( name => $_ ,indexed =>1);
}
my $doc = $invindexer->new_doc;


my %mail = (
'date' => 'Mon, 07 Jan 2008 14:04:35 +0530',
'to' => 'myteam@example.com',
'subject' => 'subject test here ',
'body' => 'This is a very short body here',
'cc' => 'ram@example.com',
'from' => 'sagar@example.com',
'message-id' => '<1199694875.14998.392.camel@sagar.example.com>',
'filename'=>'/abc/def'
);
foreach (keys %mail) {
next unless($mail{$_});
$doc->set_value( $_ => $mail{$_} );
}
$invindexer->add_doc($doc);
$invindexer->finish;


$analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language =>
'en' );
my $searcher = KinoSearch::Searcher->new(
invindex => '/tmp/invindex',
analyzer => $analyzer,
);



#
# Search on From ... works fine
#
my $term = KinoSearch::Index::Term->new("from","sagar");
my $term_query = KinoSearch::Search::TermQuery->new(term => $term);
my $hits = $searcher->search( query => $term_query );
while ( my $hit = $hits->fetch_hit_hashref ){
print "Found HIT in from" . $hit->{filename}."\n";
}


#
# Search on body ... no results
#
my $term = KinoSearch::Index::Term->new("body","short body");
my $term_query = KinoSearch::Search::TermQuery->new(term => $term);
my $hits = $searcher->search( query => $term_query );
while ( my $hit = $hits->fetch_hit_hashref ){
print "Found HIT in body" . $hit->{filename}."\n";
}


------------------------------------------------------------------------------------------------------------------------







*


_______________________________________________
KinoSearch mailing list
KinoSearch@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch
newbie: Indexing and searching text not working [ In reply to ]
Hi,

After creating your index with PolyAnalyzer, your body field will have the
terms "short" and "body" but not "short body." Take a look at
KinoSearch::QueryParser::QueryParser as it will likely do what you want.

Best,
Mike

On Sat, Aug 23, 2008 at 2:58 PM, <kinosearch-request@rectangular.com> wrote:

> Date: Sat, 23 Aug 2008 18:31:58 +0530
> From: ram <ram@netcore.co.in>
> Subject: [KinoSearch] newbie: Indexing and searching text not working
> To: kinosearch@rectangular.com
> Message-ID: <1219496518.25503.64.camel@darkstar.netcore.co.in>
> Content-Type: text/plain
>
> Hi list,
>
> I am a newbie here
> I have written an indexer to index all my mails. I am able to search on
> to & from fields but not search on the actual body
> can someone help me where I am going wrong
>
> I have the script below
>
>
>
>
>
> ---------------------------------------------------------------------------------------------------------------
> use KinoSearch::InvIndexer;
> use KinoSearch::Analysis::PolyAnalyzer;
> use KinoSearch::Searcher;
> use strict;
> #
> # Start on a clean slate
> #
> system("rm -rf /tmp/invindex/*");
>
> my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language =>
> 'en' );
> @gl::headers = qw(from to cc subject body date reply-to message-id
> in-reply-to filename);
> my $invindexer = KinoSearch::InvIndexer->new(
> invindex => '/tmp/invindex',
> create => 1,
> analyzer => $analyzer,
> );
> foreach (@gl::headers) {
> $invindexer->spec_field( name => $_ ,indexed =>1);
> }
> my $doc = $invindexer->new_doc;
>
>
> my %mail = (
> 'date' => 'Mon, 07 Jan 2008 14:04:35 +0530',
> 'to' => 'myteam@example.com',
> 'subject' => 'subject test here ',
> 'body' => 'This is a very short body here',
> 'cc' => 'ram@example.com',
> 'from' => 'sagar@example.com',
> 'message-id' => '<1199694875.14998.392.camel@sagar.example.com>',
> 'filename'=>'/abc/def'
> );
> foreach (keys %mail) {
> next unless($mail{$_});
> $doc->set_value( $_ => $mail{$_} );
> }
> $invindexer->add_doc($doc);
> $invindexer->finish;
>
>
> $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language =>
> 'en' );
> my $searcher = KinoSearch::Searcher->new(
> invindex => '/tmp/invindex',
> analyzer => $analyzer,
> );
>
>
>
> #
> # Search on From ... works fine
> #
> my $term = KinoSearch::Index::Term->new("from","sagar");
> my $term_query = KinoSearch::Search::TermQuery->new(term => $term);
> my $hits = $searcher->search( query => $term_query );
> while ( my $hit = $hits->fetch_hit_hashref ){
> print "Found HIT in from" . $hit->{filename}."\n";
> }
>
>
> #
> # Search on body ... no results
> #
> my $term = KinoSearch::Index::Term->new("body","short body");
> my $term_query = KinoSearch::Search::TermQuery->new(term => $term);
> my $hits = $searcher->search( query => $term_query );
> while ( my $hit = $hits->fetch_hit_hashref ){
> print "Found HIT in body" . $hit->{filename}."\n";
> }
>
>
Re: newbie: Indexing and searching text not working [ In reply to ]
On Sat, 2008-08-23 at 15:22 -0400, Mike Barborak wrote:
> Hi,
>
> After creating your index with PolyAnalyzer, your body field will have
> the terms "short" and "body" but not "short body." Take a look at
> KinoSearch::QueryParser::QueryParser as it will likely do what you
> want.

I think my installation has got some issue. I cant search on a single
word too



---------------------------------------
use KinoSearch::InvIndexer;
use KinoSearch::Analysis::PolyAnalyzer;
use KinoSearch::Searcher;
use strict;
#
# Start on a clean slate
#
system("rm -rf /tmp/invindex/*");
my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language =>
'en' );
@gl::headers = qw(from to cc subject body date reply-to message-id
in-reply-to filename);
my $invindexer = KinoSearch::InvIndexer->new(
invindex => '/tmp/invindex',
create => 1,
analyzer => $analyzer,
);
foreach (@gl::headers) {
$invindexer->spec_field( name => $_ ,indexed =>1);
}
my $doc = $invindexer->new_doc;
my %mail = (
'date' => 'Mon, 07 Jan 2008 14:04:35 +0530',
'to' => 'myteam@example.com',
'subject' => 'subject test here ',
'body' => 'This is a very short body here ',
'cc' => 'ram@example.com',
'from' => 'sagar@example.com',
'message-id' => '<1199694875.14998.392.camel@sagar.example.com>',
'filename'=>'/abc/def'
);
foreach (keys %mail) {
next unless($mail{$_});
$doc->set_value( $_ => $mail{$_} );
}
$invindexer->add_doc($doc);
$invindexer->finish;


$analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language =>
'en' );
my $searcher = KinoSearch::Searcher->new(
invindex => '/tmp/invindex',
analyzer => $analyzer,
);
#
# Search on body
#
my $term = KinoSearch::Index::Term->new("body","very");
my $term_query = KinoSearch::Search::TermQuery->new(term => $term);
my $hits = $searcher->search( query => $term_query );
while ( my $hit = $hits->fetch_hit_hashref ){
print "Found HIT in body" . $hit->{body}."\n";
}

-----------------------------------------------------------------

I am using Fedora-8 and perl-5.10 and latest kinosearch installed via
CPAN



_______________________________________________
KinoSearch mailing list
KinoSearch@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch