Mailing List Archive

r3699 - in trunk/perl: buildlib lib lib/KinoSearch/Analysis t
Author: creamyg
Date: 2008-08-03 00:30:25 -0700 (Sun, 03 Aug 2008)
New Revision: 3699

Modified:
trunk/perl/buildlib/KinoTestUtils.pm
trunk/perl/lib/KinoSearch.pm
trunk/perl/lib/KinoSearch/Analysis/PolyAnalyzer.pm
trunk/perl/lib/KinoSearch/Analysis/Tokenizer.pm
trunk/perl/t/150-polyanalyzer.t
trunk/perl/t/151-analyzer.t
trunk/perl/t/153-lc_normalizer.t
trunk/perl/t/154-tokenizer.t
trunk/perl/t/155-stopalizer.t
trunk/perl/t/156-stemmer.t
Log:
Kill off Analyzer::transform_field().


Modified: trunk/perl/buildlib/KinoTestUtils.pm
===================================================================
--- trunk/perl/buildlib/KinoTestUtils.pm 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/buildlib/KinoTestUtils.pm 2008-08-03 07:30:25 UTC (rev 3699)
@@ -182,7 +182,7 @@
return ( $smiley, $not_a_smiley, $frowny );
}

-# Verify an Analyzer's transform, transform_field, transform_text, and split methods.
+# Verify an Analyzer's transform, transform_text, and split methods.
sub test_analyzer {
my ( $analyzer, $source, $expected, $message ) = @_;

@@ -203,14 +203,6 @@

@got = @{ $analyzer->split($source) };
Test::More::is_deeply( \@got, $expected, "split: $message" );
-
- $batch = $analyzer->transform_field(
- KinoSearch::Doc->new( fields => { content => $source } ), 'content' );
- @got = ();
- while ( my $token = $batch->next ) {
- push @got, $token->get_text;
- }
- Test::More::is_deeply( \@got, $expected, "transform_field: $message" );
}

# Extract all doc nums from a TopDocCollector. Return two sorted array refs:

Modified: trunk/perl/lib/KinoSearch/Analysis/PolyAnalyzer.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Analysis/PolyAnalyzer.pm 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/lib/KinoSearch/Analysis/PolyAnalyzer.pm 2008-08-03 07:30:25 UTC (rev 3699)
@@ -7,7 +7,7 @@
__AUTO_XS__

{ "KinoSearch::Analysis::PolyAnalyzer" => {
- make_constructors => ["_new"],
+ make_constructors => ["new"],
make_getters => [qw( analyzers )],
},
}

Modified: trunk/perl/lib/KinoSearch/Analysis/Tokenizer.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Analysis/Tokenizer.pm 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/lib/KinoSearch/Analysis/Tokenizer.pm 2008-08-03 07:30:25 UTC (rev 3699)
@@ -33,24 +33,6 @@
self->token_re = mg->mg_obj;
}

-SV*
-transform_field(self, doc, field_sv)
- kino_Tokenizer *self;
- kino_Doc *doc;
- SV *field_sv;
-CODE:
-{
- STRLEN field_name_len;
- char *field_name = SvPV(field_sv, field_name_len);
- SV *string_sv = extract_sv( (HV*)doc->fields, field_name, field_name_len );
- STRLEN len;
- char *string = SvPV(string_sv, len);
- kino_TokenBatch *batch = kino_TokenBatch_new(NULL);
- Kino_Tokenizer_Tokenize_Str(self, string, len, batch);
- KOBJ_TO_SV_NOINC(batch, RETVAL);
-}
-OUTPUT: RETVAL
-
__POD__

=head1 NAME

Modified: trunk/perl/lib/KinoSearch.pm
===================================================================
--- trunk/perl/lib/KinoSearch.pm 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/lib/KinoSearch.pm 2008-08-03 07:30:25 UTC (rev 3699)
@@ -88,13 +88,6 @@
package KinoSearch::Analysis::Analyzer;
use KinoSearch::Util::StringHelper qw( utf8_flag_on );

- sub transform_field {
- my ( $self, $doc, $field_name ) = @_;
- my $batch = KinoSearch::Analysis::TokenBatch->new(
- text => $doc->{$field_name} );
- return $self->transform($batch);
- }
-
sub split {
my $retval = _split(@_)->to_perl;
utf8_flag_on($_) for @$retval;
@@ -103,50 +96,6 @@
}

{
- package KinoSearch::Analysis::LCNormalizer;
-
- sub transform_field {
- return KinoSearch::Analysis::TokenBatch->new(
- text => lc( $_[1]->{ $_[2] } ) );
- }
-}
-
-{
- package KinoSearch::Analysis::PolyAnalyzer;
-
- our %instance_vars = __PACKAGE__->init_instance_vars(
- cached_analyzers => \our %cached_analyzers, );
-
- sub new {
- my ( $either, %args ) = @_;
-
- my $self = $either->_new(%args);
- # Cache analyzers as Perl array as a lame-o optimization until
- # transform_field gets ported to C.
- $cached_analyzers{$$self} = $self->get_analyzers->to_perl;
- return $self;
- }
-
- sub transform_field {
- my $analyzers = $cached_analyzers{ ${ $_[0] } };
-
- if ( !@$analyzers ) {
- return KinoSearch::Analysis::TokenBatch->new(
- text => $_[1]->{ $_[2] } );
- }
- elsif ( @$analyzers == 1 ) {
- return $analyzers->[0]->transform_field( $_[1], $_[2] );
- }
- else {
- my $batch = $analyzers->[0]->transform_field( $_[1], $_[2] );
- $batch = $_->transform($batch)
- for @{$analyzers}[ 1 .. $#$analyzers ];
- return $batch;
- }
- }
-}
-
-{
package KinoSearch::Analysis::Stemmer;
sub lazy_load_snowball { require Lingua::Stem::Snowball }
}

Modified: trunk/perl/t/150-polyanalyzer.t
===================================================================
--- trunk/perl/t/150-polyanalyzer.t 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/t/150-polyanalyzer.t 2008-08-03 07:30:25 UTC (rev 3699)
@@ -2,7 +2,7 @@
use warnings;
use lib 'buildlib';

-use Test::More tests => 20;
+use Test::More tests => 15;

use KinoTestUtils qw( test_analyzer );


Modified: trunk/perl/t/151-analyzer.t
===================================================================
--- trunk/perl/t/151-analyzer.t 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/t/151-analyzer.t 2008-08-03 07:30:25 UTC (rev 3699)
@@ -2,7 +2,7 @@
use warnings;
use lib 'buildlib';

-use Test::More tests => 6;
+use Test::More tests => 5;

use KinoSearch::Analysis::Analyzer;
use KinoTestUtils qw( utf8_test_strings test_analyzer );

Modified: trunk/perl/t/153-lc_normalizer.t
===================================================================
--- trunk/perl/t/153-lc_normalizer.t 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/t/153-lc_normalizer.t 2008-08-03 07:30:25 UTC (rev 3699)
@@ -2,7 +2,7 @@
use warnings;
use lib 'buildlib';

-use Test::More tests => 4;
+use Test::More tests => 3;
use KinoTestUtils qw( test_analyzer );

use KinoSearch::Analysis::LCNormalizer;

Modified: trunk/perl/t/154-tokenizer.t
===================================================================
--- trunk/perl/t/154-tokenizer.t 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/t/154-tokenizer.t 2008-08-03 07:30:25 UTC (rev 3699)
@@ -1,7 +1,7 @@
use strict;
use warnings;

-use Test::More tests => 9;
+use Test::More tests => 8;

use KinoSearch::Analysis::Tokenizer;
use KinoSearch::Analysis::TokenBatch;
@@ -52,13 +52,3 @@
"no freakout when fed multiple tokens"
);

-$batch->reset;
-$tokenizer = KinoSearch::Analysis::Tokenizer->new();
-my $doc = KinoSearch::Doc->new( fields => { monroe => 'some like it hot' } );
-$batch = $tokenizer->transform_field( $doc, 'monroe' );
-@token_texts = ();
-while ( my $token = $batch->next ) {
- push @token_texts, $token->get_text;
-}
-is_deeply( \@token_texts, [ 'some', 'like', 'it', 'hot' ],
- "transform_field" );

Modified: trunk/perl/t/155-stopalizer.t
===================================================================
--- trunk/perl/t/155-stopalizer.t 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/t/155-stopalizer.t 2008-08-03 07:30:25 UTC (rev 3699)
@@ -2,7 +2,7 @@
use warnings;
use lib 'buildlib';

-use Test::More tests => 8;
+use Test::More tests => 6;
use KinoTestUtils qw( test_analyzer );

use KinoSearch::Analysis::Stopalizer;

Modified: trunk/perl/t/156-stemmer.t
===================================================================
--- trunk/perl/t/156-stemmer.t 2008-08-03 07:28:03 UTC (rev 3698)
+++ trunk/perl/t/156-stemmer.t 2008-08-03 07:30:25 UTC (rev 3699)
@@ -2,7 +2,7 @@
use warnings;
use lib 'buildlib';

-use Test::More tests => 8;
+use Test::More tests => 6;
use KinoTestUtils qw( test_analyzer );

use KinoSearch::Analysis::Stemmer;


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits