Mailing List Archive

r3646 - in trunk/perl/lib: . KinoSearch/Index KinoSearch/Posting
Author: creamyg
Date: 2008-07-27 17:19:17 -0700 (Sun, 27 Jul 2008)
New Revision: 3646

Modified:
trunk/perl/lib/KinoSearch.pm
trunk/perl/lib/KinoSearch/Index/DelDocs.pm
trunk/perl/lib/KinoSearch/Index/DocReader.pm
trunk/perl/lib/KinoSearch/Index/DocVector.pm
trunk/perl/lib/KinoSearch/Index/DocWriter.pm
trunk/perl/lib/KinoSearch/Index/FilePurger.pm
trunk/perl/lib/KinoSearch/Index/IndexReader.pm
trunk/perl/lib/KinoSearch/Index/Inverter.pm
trunk/perl/lib/KinoSearch/Index/LexCache.pm
trunk/perl/lib/KinoSearch/Index/LexReader.pm
trunk/perl/lib/KinoSearch/Index/LexStepper.pm
trunk/perl/lib/KinoSearch/Index/LexWriter.pm
trunk/perl/lib/KinoSearch/Index/Lexicon.pm
trunk/perl/lib/KinoSearch/Index/MultiLexicon.pm
trunk/perl/lib/KinoSearch/Index/MultiPostingList.pm
trunk/perl/lib/KinoSearch/Index/MultiReader.pm
trunk/perl/lib/KinoSearch/Index/PostingList.pm
trunk/perl/lib/KinoSearch/Index/PostingsWriter.pm
trunk/perl/lib/KinoSearch/Index/SegDataReader.pm
trunk/perl/lib/KinoSearch/Index/SegDataWriter.pm
trunk/perl/lib/KinoSearch/Index/SegInfo.pm
trunk/perl/lib/KinoSearch/Index/SegLexCache.pm
trunk/perl/lib/KinoSearch/Index/SegLexicon.pm
trunk/perl/lib/KinoSearch/Index/SegPostingList.pm
trunk/perl/lib/KinoSearch/Index/SegReader.pm
trunk/perl/lib/KinoSearch/Index/SegWriter.pm
trunk/perl/lib/KinoSearch/Index/Snapshot.pm
trunk/perl/lib/KinoSearch/Index/TermInfo.pm
trunk/perl/lib/KinoSearch/Index/TermVector.pm
trunk/perl/lib/KinoSearch/Index/TermVectorsReader.pm
trunk/perl/lib/KinoSearch/Index/TermVectorsWriter.pm
trunk/perl/lib/KinoSearch/Posting/MatchPosting.pm
trunk/perl/lib/KinoSearch/Posting/RichPosting.pm
trunk/perl/lib/KinoSearch/Posting/ScorePosting.pm
Log:
Migrate all Perl code out of modules under KinoSearch::Index and
KinoSearch::Posting into KinoSearch.pm.


Modified: trunk/perl/lib/KinoSearch/Index/DelDocs.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/DelDocs.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/DelDocs.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,11 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::DelDocs;
-use KinoSearch::base qw( KinoSearch::Util::BitVector );
-
-use KinoSearch::Util::IntMap;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/DocReader.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/DocReader.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/DocReader.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,30 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::DocReader;
-use KinoSearch::base qw( KinoSearch::Index::SegDataReader );
-
-use KinoSearch::Util::StringHelper qw( utf8_flag_on );
-use KinoSearch::Doc::HitDoc;
-
-my $zlib_loaded = 0;
-
-sub apply_decompression {
- my ( $self, $doc ) = @_;
- if ( !$zlib_loaded ) {
- require Compress::Zlib;
- Compress::Zlib->import('uncompress');
- $zlib_loaded = 1;
- }
- my $schema = $self->get_schema;
- for my $field_name ( keys %$doc ) {
- my $fspec = $schema->fetch_fspec($field_name);
- next unless $fspec->compressed;
- $doc->{$field_name} = uncompress( $doc->{$field_name} );
- utf8_flag_on( $doc->{$field_name} ) unless $fspec->binary;
- }
-}
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/DocVector.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/DocVector.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/DocVector.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,11 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::DocVector;
-use KinoSearch::base qw( KinoSearch::Obj );
-
-use KinoSearch::Index::TermVector;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/DocWriter.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/DocWriter.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/DocWriter.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,38 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::DocWriter;
-use KinoSearch::base qw( KinoSearch::Index::SegDataWriter );
-
-use KinoSearch::Util::StringHelper qw( utf8_flag_off utf8_flag_on );
-use KinoSearch::Index::IndexFileNames qw( DOC_STORAGE_FORMAT );
-
-my $zlib_loaded = 0;
-
-sub write_compressed {
- my ( $self, $doc ) = @_;
- my $fields = $doc->get_fields;
- my $schema = $self->get_schema;
- my $ds_out = $self->get_ds_out;
-
- # Lazy load Compress::Zlib.
- if ( !$zlib_loaded ) {
- require Compress::Zlib;
- Compress::Zlib->import('compress');
- $zlib_loaded = 1;
- }
-
- # Write compressed fields.
- for my $field ( keys %$fields ) {
- my $fspec = $schema->fetch_fspec($field);
- next unless $fspec->compressed && $fspec->stored;
- utf8_flag_off( $fields->{$field} );
- $ds_out->write_string($field);
- $ds_out->write_buf( compress( $fields->{$field} ) );
- utf8_flag_on( $fields->{field} ) unless $fspec->binary;
- }
-}
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/FilePurger.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/FilePurger.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/FilePurger.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,18 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::FilePurger;
-use KinoSearch::base qw( KinoSearch::Obj );
-
-use KinoSearch::Index::Snapshot;
-
-sub try_delete_file {
- my ( $self, $filename ) = @_;
- eval { $self->get_invindex->get_folder->delete_file($filename) };
- # Catch delete file exception.
- if ( $@ and $@ !~ /Folder_delete_file/ ) { die($@) }
-}
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/IndexReader.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/IndexReader.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/IndexReader.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,168 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::IndexReader;
-use KinoSearch::Util::ToolSet qw( confess a_isa_b );
-use KinoSearch::base qw( KinoSearch::Obj );
-
-# Test code will define these as coderefs.
-our $debug1;
-our $debug2;
-
-use KinoSearch::Index::Snapshot;
-use KinoSearch::Index::SegReader;
-use KinoSearch::Index::MultiReader;
-use KinoSearch::Index::MultiLexicon;
-use KinoSearch::Index::PostingList;
-use KinoSearch::Index::SegLexicon;
-use KinoSearch::Index::LexCache;
-use KinoSearch::Util::IntMap;
-use KinoSearch::Util::StringHelper qw( from_base36 );
-
-sub open {
- my $temp = _new(@_);
- return $temp->_open_multi_or_segreader;
-}
-
-sub new {
- confess("IndexReader is an abstract class; use IndexReader->open instead")
- if $_[0] eq __PACKAGE__;
- return _new(@_);
-}
-
-# Returns a subclass of IndexReader: either a MultiReader or a SegReader,
-# depending on whether an InvIndex contains more than one segment.
-sub _open_multi_or_segreader {
- my $self = shift;
-
- # Verify InvIndex and extract schema.
- my $invindex = $self->get_invindex;
- confess("Missing required arg 'invindex'")
- unless a_isa_b( $invindex, "KinoSearch::InvIndex" );
- my $schema = $invindex->get_schema;
- my $folder = $invindex->get_folder;
-
- # Confirm lock factory if supplied.
- my $lock_factory = $self->get_lock_factory;
- if ( defined $lock_factory ) {
- confess("Not a KinoSearch::Store::LockFactory")
- unless a_isa_b( $lock_factory, "KinoSearch::Store::LockFactory" );
- }
-
- $self->obtain_commit_lock if defined $lock_factory;
-
- my $snapshot;
- my @seg_readers;
- my ( $gen, $last_gen );
- while (1) {
- eval {
- # Find the most recent snapshot file.
- my $latest_snapshot_file = $folder->latest_snapshot_file;
- confess("Index doesn't seem to contain any data")
- unless defined $latest_snapshot_file;
- $latest_snapshot_file =~ /snapshot_(\w+)/
- or die "Strange snapshot name: $latest_snapshot_file";
- $gen = from_base36($1);
-
- # Get a read lock on the most recent snapshot file if indicated.
- if ( defined $lock_factory ) {
- $self->obtain_read_lock($latest_snapshot_file);
- }
-
- $debug1->() if defined $debug1;
-
- if ( defined $self->get_snapshot ) {
- # Either use the passed-in snapshot...
- $snapshot = $self->get_snapshot;
- }
- else {
- # ... or read the most recent snapshot file.
- $snapshot
- = KinoSearch::Index::Snapshot->new( schema => $schema );
- my $folder = $invindex->get_folder;
- $snapshot->read_snapshot( folder => $folder );
- }
-
- # Throw an error if index doesn't exist.
- confess("Index doesn't seem to contain any data")
- unless $snapshot->size;
-
- # Deal with race condition between locking and reading segs file.
- if ( $snapshot->get_generation > $gen ) {
- confess("More recent segs file than $gen detected");
- }
-
- for my $seg_info ( sort { $a->get_seg_name cmp $b->get_seg_name }
- @{ $snapshot->infos->to_perl } )
- {
- # Create a SegReader for each segment in the InvIndex.
- push @seg_readers,
- KinoSearch::Index::SegReader->new(
- invindex => $invindex,
- seg_info => $seg_info,
- lock_factory => $lock_factory,
- );
- }
- };
-
- # It's possible, though unlikely, for an InvIndexer to delete files
- # out from underneath us after the snapshot file is read but before
- # we've got SegReaders holding open all the required files. If we
- # failed to open something, see if we can find a newer snapshot file.
- # If we can, then the exception was due to the race condition. If
- # not, we have a real exception, so throw an error.
- if ($@) {
- my $saved_error = $@;
- $self->release_read_lock;
- if ( !defined $snapshot
- or ( defined $last_gen and $last_gen == $gen ) )
- {
- $self->release_commit_lock if defined $lock_factory;
- confess($saved_error);
- }
- $last_gen = $gen;
- @seg_readers = ();
- undef $snapshot;
- }
- else {
- $self->set_snapshot($snapshot);
- last;
- }
- }
-
- $self->release_commit_lock if defined $lock_factory;
-
- # If there's one SegReader use it; otherwise make a MultiReader.
- my $true_self;
- if ( @seg_readers == 1 ) {
- $true_self = $seg_readers[0];
- }
- else {
- my $sub_readers = KinoSearch::Util::VArray->new(
- capacity => scalar @seg_readers );
- $sub_readers->push($_) for @seg_readers;
- $true_self = KinoSearch::Index::MultiReader->new(
- invindex => $invindex,
- sub_readers => $sub_readers,
- lock_factory => $lock_factory,
- );
- }
-
- # Copy crucial elements.
- $true_self->set_read_lock( $self->get_read_lock );
-
- # Thwart release of lock on destruction of temp self.
- $self->set_read_lock(undef);
- $self->set_commit_lock(undef);
- $self->set_lock_factory(undef);
-
- return $true_self;
-}
-
-our %doc_freq_args = ( field => undef, term => undef );
-our %lexicon_args = ( field => undef, term => undef );
-our %posting_list_args = ( field => undef, term => undef );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/Inverter.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/Inverter.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/Inverter.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,11 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::Inverter;
-use KinoSearch::base qw( KinoSearch::Obj );
-
-use KinoSearch::Analysis::TokenBatch;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/LexCache.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/LexCache.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/LexCache.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,9 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::LexCache;
-use KinoSearch::base qw( KinoSearch::Index::Lexicon );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/LexReader.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/LexReader.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/LexReader.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,14 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::LexReader;
-use KinoSearch::base qw( KinoSearch::Index::SegDataReader );
-
-use KinoSearch::Index::TermInfo;
-use KinoSearch::Index::SegLexicon;
-use KinoSearch::Index::LexCache;
-use KinoSearch::Index::SegLexCache;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/LexStepper.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/LexStepper.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/LexStepper.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,9 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::LexStepper;
-use KinoSearch::base qw( KinoSearch::Util::Stepper );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/LexWriter.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/LexWriter.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/LexWriter.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,9 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::LexWriter;
-use KinoSearch::base qw( KinoSearch::Index::SegDataWriter );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/Lexicon.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/Lexicon.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/Lexicon.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,18 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::Lexicon;
-use KinoSearch::base qw( KinoSearch::Obj );
-
-our %build_sort_cache_args = (
- # params
- posting_list => undef,
- max_docs => undef,
-);
-
-use KinoSearch::Util::IntMap;
-use KinoSearch::Index::PostingList;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/MultiLexicon.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/MultiLexicon.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/MultiLexicon.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,11 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::MultiLexicon;
-use KinoSearch::base qw( KinoSearch::Index::Lexicon );
-
-use KinoSearch::Index::SegLexicon;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/MultiPostingList.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/MultiPostingList.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/MultiPostingList.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,9 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::MultiPostingList;
-use KinoSearch::base qw( KinoSearch::Index::PostingList );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/MultiReader.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/MultiReader.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/MultiReader.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,13 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::MultiReader;
-use KinoSearch::base qw( KinoSearch::Index::IndexReader );
-
-use KinoSearch::Index::SegReader;
-use KinoSearch::Index::MultiPostingList;
-use KinoSearch::Index::MultiLexicon;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/PostingList.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/PostingList.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/PostingList.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,9 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::PostingList;
-use KinoSearch::base qw( KinoSearch::Obj );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/PostingsWriter.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/PostingsWriter.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/PostingsWriter.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,12 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::PostingsWriter;
-use KinoSearch::base qw( KinoSearch::Index::SegDataWriter );
-
-use KinoSearch::Index::TermInfo;
-use KinoSearch::Index::LexWriter;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/SegDataReader.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/SegDataReader.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/SegDataReader.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,9 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::SegDataReader;
-use KinoSearch::base qw( KinoSearch::Obj );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/SegDataWriter.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/SegDataWriter.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/SegDataWriter.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,16 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::SegDataWriter;
-use KinoSearch::base qw( KinoSearch::Obj );
-
-sub add_doc { shift->abstract_death }
-
-# TODO:
-sub extensions { shift->abstract_death }
-sub metadata { shift->abstract_death }
-sub file_format_revision { shift->abstract_death }
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/SegInfo.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/SegInfo.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/SegInfo.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,20 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::SegInfo;
-use KinoSearch::Util::ToolSet qw( to_kino to_perl );
-use KinoSearch::base qw( KinoSearch::Obj );
-
-sub add_metadata {
- my ( $self, $key, $val ) = @_;
- $self->_add_metadata( $key, to_kino($val) );
-}
-
-sub extract_metadata {
- my ( $self, $key ) = @_;
- return to_perl( $self->_extract_metadata($key) );
-}
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/SegLexCache.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/SegLexCache.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/SegLexCache.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,9 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::SegLexCache;
-use KinoSearch::base qw( KinoSearch::Index::LexCache );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/SegLexicon.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/SegLexicon.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/SegLexicon.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,11 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::SegLexicon;
-use KinoSearch::base qw( KinoSearch::Index::Lexicon );
-
-use KinoSearch::Index::TermInfo;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/SegPostingList.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/SegPostingList.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/SegPostingList.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,11 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::SegPostingList;
-use KinoSearch::base qw( KinoSearch::Index::PostingList );
-
-use KinoSearch::Posting::ScorePosting;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/SegReader.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/SegReader.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/SegReader.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,17 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::SegReader;
-use KinoSearch::base qw( KinoSearch::Index::IndexReader );
-
-use KinoSearch::Index::LexReader;
-use KinoSearch::Index::SegLexicon;
-use KinoSearch::Index::TermVectorsReader;
-use KinoSearch::Index::DocReader;
-use KinoSearch::Index::Snapshot;
-use KinoSearch::Index::SegPostingList;
-use KinoSearch::Index::DelDocs;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/SegWriter.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/SegWriter.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/SegWriter.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,16 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::SegWriter;
-use KinoSearch::base qw( KinoSearch::Index::SegDataWriter );
-
-use KinoSearch::Analysis::TokenBatch;
-use KinoSearch::Doc;
-use KinoSearch::Index::DocWriter;
-use KinoSearch::Index::Inverter;
-use KinoSearch::Index::PostingsWriter;
-use KinoSearch::Index::TermVectorsWriter;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/Snapshot.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/Snapshot.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/Snapshot.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,16 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::Snapshot;
-use KinoSearch::base qw( KinoSearch::Obj );
-
-use KinoSearch::Index::SegInfo;
-use KinoSearch::FieldSpec;
-use KinoSearch::Schema;
-use KinoSearch::Index::IndexFileNames;
-use KinoSearch::Util::StringHelper;
-use KinoSearch::Util::Json;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/TermInfo.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/TermInfo.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/TermInfo.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,9 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::TermInfo;
-use KinoSearch::base qw( KinoSearch::Obj::FastObj );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/TermVector.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/TermVector.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/TermVector.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,13 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::TermVector;
-use KinoSearch::base qw( KinoSearch::Obj );
-
-sub get_positions { shift->_get_positions->to_arrayref }
-sub get_start_offsets { shift->_get_start_offsets->to_arrayref }
-sub get_end_offsets { shift->_get_end_offsets->to_arrayref }
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/TermVectorsReader.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/TermVectorsReader.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/TermVectorsReader.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,11 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::TermVectorsReader;
-use KinoSearch::base qw( KinoSearch::Index::SegDataReader );
-
-use KinoSearch::Index::DocVector;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Index/TermVectorsWriter.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/TermVectorsWriter.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Index/TermVectorsWriter.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,12 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Index::TermVectorsWriter;
-use KinoSearch::base qw( KinoSearch::Index::SegDataWriter );
-
-use KinoSearch::Index::DocVector;
-use KinoSearch::Index::TermVector;
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Posting/MatchPosting.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Posting/MatchPosting.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Posting/MatchPosting.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,17 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Posting::MatchPosting;
-use KinoSearch::base qw( KinoSearch::Posting );
-
-our %instance_vars = (
- # constructor params
- similarity => undef,
-);
-
-package KinoSearch::Posting::MatchPostingScorer;
-use KinoSearch::base qw( KinoSearch::Search::TermScorer );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Posting/RichPosting.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Posting/RichPosting.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Posting/RichPosting.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,17 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Posting::RichPosting;
-use KinoSearch::base qw( KinoSearch::Posting::ScorePosting );
-
-our %instance_vars = (
- # constructor params
- similarity => undef,
-);
-
-package KinoSearch::Posting::RichPostingScorer;
-BEGIN { our @ISA = qw( KinoSearch::Posting::ScorePostingScorer ); }
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch/Posting/ScorePosting.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Posting/ScorePosting.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch/Posting/ScorePosting.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -1,17 +1,5 @@
-use strict;
-use warnings;
+use KinoSearch;

-package KinoSearch::Posting::ScorePosting;
-use KinoSearch::base qw( KinoSearch::Posting::MatchPosting );
-
-our %instance_vars = (
- # constructor params
- similarity => undef,
-);
-
-package KinoSearch::Posting::ScorePostingScorer;
-use KinoSearch::base qw( KinoSearch::Search::TermScorer );
-
1;

__END__

Modified: trunk/perl/lib/KinoSearch.pm
===================================================================
--- trunk/perl/lib/KinoSearch.pm 2008-07-28 00:10:14 UTC (rev 3645)
+++ trunk/perl/lib/KinoSearch.pm 2008-07-28 00:19:17 UTC (rev 3646)
@@ -170,6 +170,265 @@
}

{
+ package KinoSearch::Index::DocReader;
+ use KinoSearch::base qw( KinoSearch::Index::SegDataReader );
+
+ use KinoSearch::Util::StringHelper qw( utf8_flag_on );
+ use KinoSearch::Doc::HitDoc;
+
+ my $zlib_loaded = 0;
+
+ sub apply_decompression {
+ my ( $self, $doc ) = @_;
+ if ( !$zlib_loaded ) {
+ require Compress::Zlib;
+ Compress::Zlib->import('uncompress');
+ $zlib_loaded = 1;
+ }
+ my $schema = $self->get_schema;
+ for my $field_name ( keys %$doc ) {
+ my $fspec = $schema->fetch_fspec($field_name);
+ next unless $fspec->compressed;
+ $doc->{$field_name} = uncompress( $doc->{$field_name} );
+ utf8_flag_on( $doc->{$field_name} ) unless $fspec->binary;
+ }
+ }
+}
+
+{
+ package KinoSearch::Index::DocWriter;
+ use KinoSearch::base qw( KinoSearch::Index::SegDataWriter );
+
+ use KinoSearch::Util::StringHelper qw( utf8_flag_off utf8_flag_on );
+ use KinoSearch::Index::IndexFileNames qw( DOC_STORAGE_FORMAT );
+
+ my $zlib_loaded = 0;
+
+ sub write_compressed {
+ my ( $self, $doc ) = @_;
+ my $fields = $doc->get_fields;
+ my $schema = $self->get_schema;
+ my $ds_out = $self->get_ds_out;
+
+ # Lazy load Compress::Zlib.
+ if ( !$zlib_loaded ) {
+ require Compress::Zlib;
+ Compress::Zlib->import('compress');
+ $zlib_loaded = 1;
+ }
+
+ # Write compressed fields.
+ for my $field ( keys %$fields ) {
+ my $fspec = $schema->fetch_fspec($field);
+ next unless $fspec->compressed && $fspec->stored;
+ utf8_flag_off( $fields->{$field} );
+ $ds_out->write_string($field);
+ $ds_out->write_buf( compress( $fields->{$field} ) );
+ utf8_flag_on( $fields->{field} ) unless $fspec->binary;
+ }
+ }
+}
+
+{
+ package KinoSearch::Index::FilePurger;
+ use KinoSearch::base qw( KinoSearch::Obj );
+
+ sub try_delete_file {
+ my ( $self, $filename ) = @_;
+ eval { $self->get_invindex->get_folder->delete_file($filename) };
+ # Catch delete file exception.
+ if ( $@ and $@ !~ /Folder_delete_file/ ) { die($@) }
+ }
+}
+
+{
+ package KinoSearch::Index::IndexReader;
+ use KinoSearch::Util::ToolSet qw( confess a_isa_b );
+ use KinoSearch::base qw( KinoSearch::Obj );
+
+ # Test code will define these as coderefs.
+ our $debug1;
+ our $debug2;
+
+ use KinoSearch::Util::StringHelper qw( from_base36 );
+
+ sub open {
+ my $temp = _new(@_);
+ return $temp->_open_multi_or_segreader;
+ }
+
+ sub new {
+ confess(
+ "IndexReader is an abstract class; use IndexReader->open instead")
+ if $_[0] eq __PACKAGE__;
+ return _new(@_);
+ }
+
+ # Returns a subclass of IndexReader: either a MultiReader or a SegReader,
+ # depending on whether an InvIndex contains more than one segment.
+ sub _open_multi_or_segreader {
+ my $self = shift;
+
+ # Verify InvIndex and extract schema.
+ my $invindex = $self->get_invindex;
+ confess("Missing required arg 'invindex'")
+ unless a_isa_b( $invindex, "KinoSearch::InvIndex" );
+ my $schema = $invindex->get_schema;
+ my $folder = $invindex->get_folder;
+
+ # Confirm lock factory if supplied.
+ my $lock_factory = $self->get_lock_factory;
+ if ( defined $lock_factory ) {
+ confess("Not a KinoSearch::Store::LockFactory")
+ unless a_isa_b( $lock_factory,
+ "KinoSearch::Store::LockFactory" );
+ }
+
+ $self->obtain_commit_lock if defined $lock_factory;
+
+ my $snapshot;
+ my @seg_readers;
+ my ( $gen, $last_gen );
+ while (1) {
+ eval {
+ # Find the most recent snapshot file.
+ my $latest_snapshot_file = $folder->latest_snapshot_file;
+ confess("Index doesn't seem to contain any data")
+ unless defined $latest_snapshot_file;
+ $latest_snapshot_file =~ /snapshot_(\w+)/
+ or die "Strange snapshot name: $latest_snapshot_file";
+ $gen = from_base36($1);
+
+ # Get a read lock on the most recent snapshot file if
+ # indicated.
+ if ( defined $lock_factory ) {
+ $self->obtain_read_lock($latest_snapshot_file);
+ }
+
+ $debug1->() if defined $debug1;
+
+ if ( defined $self->get_snapshot ) {
+ # Either use the passed-in snapshot...
+ $snapshot = $self->get_snapshot;
+ }
+ else {
+ # ... or read the most recent snapshot file.
+ $snapshot = KinoSearch::Index::Snapshot->new(
+ schema => $schema );
+ my $folder = $invindex->get_folder;
+ $snapshot->read_snapshot( folder => $folder );
+ }
+
+ # Throw an error if index doesn't exist.
+ confess("Index doesn't seem to contain any data")
+ unless $snapshot->size;
+
+ # Deal with race condition between locking and reading
+ # snapshot file.
+ if ( $snapshot->get_generation > $gen ) {
+ confess("More recent segs file than $gen detected");
+ }
+
+ for my $seg_info (
+ sort { $a->get_seg_name cmp $b->get_seg_name }
+ @{ $snapshot->infos->to_perl } )
+ {
+ # Create a SegReader for each segment in the InvIndex.
+ push @seg_readers,
+ KinoSearch::Index::SegReader->new(
+ invindex => $invindex,
+ seg_info => $seg_info,
+ lock_factory => $lock_factory,
+ );
+ }
+ };
+
+ # It's possible, though unlikely, for an InvIndexer to delete
+ # files out from underneath us after the snapshot file is read but
+ # before we've got SegReaders holding open all the required files.
+ # If we failed to open something, see if we can find a newer
+ # snapshot file. If we can, then the exception was due to the
+ # race condition. If not, we have a real exception, so throw an
+ # error.
+ if ($@) {
+ my $saved_error = $@;
+ $self->release_read_lock;
+ if ( !defined $snapshot
+ or ( defined $last_gen and $last_gen == $gen ) )
+ {
+ $self->release_commit_lock if defined $lock_factory;
+ confess($saved_error);
+ }
+ $last_gen = $gen;
+ @seg_readers = ();
+ undef $snapshot;
+ }
+ else {
+ $self->set_snapshot($snapshot);
+ last;
+ }
+ }
+
+ $self->release_commit_lock if defined $lock_factory;
+
+ # If there's one SegReader use it; otherwise make a MultiReader.
+ my $true_self;
+ if ( @seg_readers == 1 ) {
+ $true_self = $seg_readers[0];
+ }
+ else {
+ my $sub_readers = KinoSearch::Util::VArray->new(
+ capacity => scalar @seg_readers );
+ $sub_readers->push($_) for @seg_readers;
+ $true_self = KinoSearch::Index::MultiReader->new(
+ invindex => $invindex,
+ sub_readers => $sub_readers,
+ lock_factory => $lock_factory,
+ );
+ }
+
+ # Copy crucial elements.
+ $true_self->set_read_lock( $self->get_read_lock );
+
+ # Thwart release of lock on destruction of temp self.
+ $self->set_read_lock(undef);
+ $self->set_commit_lock(undef);
+ $self->set_lock_factory(undef);
+
+ return $true_self;
+ }
+
+ our %doc_freq_args = ( field => undef, term => undef );
+ our %lexicon_args = ( field => undef, term => undef );
+ our %posting_list_args = ( field => undef, term => undef );
+}
+
+{
+ package KinoSearch::Index::SegInfo;
+ use KinoSearch::Util::ToolSet qw( to_kino to_perl );
+ use KinoSearch::base qw( KinoSearch::Obj );
+
+ sub add_metadata {
+ my ( $self, $key, $val ) = @_;
+ $self->_add_metadata( $key, to_kino($val) );
+ }
+
+ sub extract_metadata {
+ my ( $self, $key ) = @_;
+ return to_perl( $self->_extract_metadata($key) );
+ }
+}
+
+{
+ package KinoSearch::Index::TermVector;
+ use KinoSearch::base qw( KinoSearch::Obj );
+
+ sub get_positions { shift->_get_positions->to_arrayref }
+ sub get_start_offsets { shift->_get_start_offsets->to_arrayref }
+ sub get_end_offsets { shift->_get_end_offsets->to_arrayref }
+}
+
+{
package KinoSearch::Store::Folder;
use KinoSearch::base qw( KinoSearch::Obj );
use File::Spec::Functions qw( rel2abs );


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits