Mailing List Archive

r3734 - trunk/perl/t
Author: creamyg
Date: 2008-08-19 07:34:00 -0700 (Tue, 19 Aug 2008)
New Revision: 3734

Modified:
trunk/perl/t/154-tokenizer.t
Log:
Change most of Tokenizer test file to use "pattern" rather than "token_re",
but add back compatibility test which uses token_re.


Modified: trunk/perl/t/154-tokenizer.t
===================================================================
--- trunk/perl/t/154-tokenizer.t 2008-08-19 14:28:05 UTC (rev 3733)
+++ trunk/perl/t/154-tokenizer.t 2008-08-19 14:34:00 UTC (rev 3734)
@@ -1,7 +1,7 @@
use strict;
use warnings;

-use Test::More tests => 8;
+use Test::More tests => 9;

use KinoSearch::Analysis::Tokenizer;
use KinoSearch::Analysis::Inversion;
@@ -9,7 +9,7 @@
my $tokenizer = KinoSearch::Analysis::Tokenizer->new;

my $text = $tokenizer->split("o'malley's")->[0];
-is( $text, "o'malley's", "multiple apostrophes for default token_re" );
+is( $text, "o'malley's", "multiple apostrophes for default pattern" );

my $inversion = KinoSearch::Analysis::Inversion->new( text => "a b c" );
$inversion = $tokenizer->transform($inversion);
@@ -24,8 +24,8 @@
is_deeply( \@start_offsets, [ 0, 2, 4, ], "correctstart offsets" );
is_deeply( \@end_offsets, [ 1, 3, 5, ], "correct end offsets" );

-$tokenizer = KinoSearch::Analysis::Tokenizer->new( token_re => qr/./ );
-$inversion = KinoSearch::Analysis::Inversion->new( text => "a b c" );
+$tokenizer = KinoSearch::Analysis::Tokenizer->new( pattern => '.' );
+$inversion = KinoSearch::Analysis::Inversion->new( text => "a b c" );
$inversion = $tokenizer->transform($inversion);

@token_texts = ();
@@ -36,9 +36,13 @@
push @start_offsets, $token->get_start_offset;
push @end_offsets, $token->get_end_offset;
}
-is_deeply( \@token_texts, [ 'a', ' ', 'b', ' ', 'c' ], "texts: custom re" );
-is_deeply( \@start_offsets, [ 0 .. 4 ], "starts: custom re" );
-is_deeply( \@end_offsets, [ 1 .. 5 ], "ends: custom re" );
+is_deeply(
+ \@token_texts,
+ [ 'a', ' ', 'b', ' ', 'c' ],
+ "texts: custom pattern"
+);
+is_deeply( \@start_offsets, [ 0 .. 4 ], "starts: custom pattern" );
+is_deeply( \@end_offsets, [ 1 .. 5 ], "ends: custom pattern" );

$inversion->reset;
$inversion = $tokenizer->transform($inversion);
@@ -52,3 +56,7 @@
"no freakout when fed multiple tokens"
);

+$tokenizer = KinoSearch::Analysis::Tokenizer->new( token_re => qr/../ );
+is_deeply( $tokenizer->split('aabbcc'),
+ [qw( aa bb cc )], "back compat with token_re argument" );
+


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits