Mailing List Archive

r3696 - in trunk/perl: lib lib/KinoSearch lib/KinoSearch/Analysis lib/KinoSearch/Highlight lib/KinoSearch/Index lib/KinoSearch/Search t xs
Author: creamyg
Date: 2008-08-02 21:55:00 -0700 (Sat, 02 Aug 2008)
New Revision: 3696

Modified:
trunk/perl/lib/KinoSearch.pm
trunk/perl/lib/KinoSearch/Analysis/Stopalizer.pm
trunk/perl/lib/KinoSearch/Highlight/HeatMap.pm
trunk/perl/lib/KinoSearch/Index/IndexFileNames.pm
trunk/perl/lib/KinoSearch/Index/SegInfo.pm
trunk/perl/lib/KinoSearch/QueryParser.pm
trunk/perl/lib/KinoSearch/Search/MultiSearcher.pm
trunk/perl/lib/KinoSearch/Search/PhraseQuery.pm
trunk/perl/lib/KinoSearch/Search/TopDocs.pm
trunk/perl/t/107-index_file_names.t
trunk/perl/xs/XSBind.c
trunk/perl/xs/XSBind.h
Log:
Convert utility functions to_kino and to_perl to XS. Change XS helper
functions so that methods expecting a VArray can be provided with a Perl
array and the XS helper will convert it to a mortalized VArray copy.


Modified: trunk/perl/lib/KinoSearch/Analysis/Stopalizer.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Analysis/Stopalizer.pm 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/lib/KinoSearch/Analysis/Stopalizer.pm 2008-08-03 04:55:00 UTC (rev 3696)
@@ -7,7 +7,7 @@
__AUTO_XS__

{ "KinoSearch::Analysis::Stopalizer" => {
- make_constructors => ["_new"],
+ make_constructors => ["new"],
},
}


Modified: trunk/perl/lib/KinoSearch/Highlight/HeatMap.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Highlight/HeatMap.pm 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/lib/KinoSearch/Highlight/HeatMap.pm 2008-08-03 04:55:00 UTC (rev 3696)
@@ -22,7 +22,7 @@
)
],
make_getters => [qw( spans window )],
- make_constructors => ["_new"],
+ make_constructors => ["new"],
make_pod => {
synopsis => " # TODO.\n",
constructor => { sample => $constructor },

Modified: trunk/perl/lib/KinoSearch/Index/IndexFileNames.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/IndexFileNames.pm 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/lib/KinoSearch/Index/IndexFileNames.pm 2008-08-03 04:55:00 UTC (rev 3696)
@@ -16,7 +16,7 @@
OUTPUT: RETVAL

SV*
-_unused_files(files, snapshots)
+unused_files(files, snapshots)
kino_VArray *files;
kino_VArray *snapshots;
CODE:

Modified: trunk/perl/lib/KinoSearch/Index/SegInfo.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Index/SegInfo.pm 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/lib/KinoSearch/Index/SegInfo.pm 2008-08-03 04:55:00 UTC (rev 3696)
@@ -8,32 +8,6 @@

MODULE = KinoSearch PACKAGE = KinoSearch::Index::SegInfo

-void
-_add_metadata(self, key_sv, val)
- kino_SegInfo *self;
- SV *key_sv;
- kino_Obj *val;
-PPCODE:
-{
- STRLEN len;
- char *key = SvPV(key_sv, len);
- Kino_SegInfo_Add_Metadata(self, key, len, val);
-}
-
-
-kino_Obj*
-_extract_metadata(self, key_sv)
- kino_SegInfo *self;
- SV *key_sv;
-CODE:
-{
- STRLEN len;
- char *key = SvPV(key_sv, len);
- RETVAL = Kino_SegInfo_Extract_Metadata(self, key, len);
- REFCOUNT_INC(RETVAL);
-}
-OUTPUT: RETVAL
-
SV*
field_num(self, field_name)
kino_SegInfo *self;

Modified: trunk/perl/lib/KinoSearch/QueryParser.pm
===================================================================
--- trunk/perl/lib/KinoSearch/QueryParser.pm 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/lib/KinoSearch/QueryParser.pm 2008-08-03 04:55:00 UTC (rev 3696)
@@ -37,7 +37,7 @@
get_analyzer
get_schema )
],
- make_constructors => ["do_new"],
+ make_constructors => ["new"],
make_pod => {
methods => [
qw( parse tree expand expand_leaf

Modified: trunk/perl/lib/KinoSearch/Search/MultiSearcher.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Search/MultiSearcher.pm 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/lib/KinoSearch/Search/MultiSearcher.pm 2008-08-03 04:55:00 UTC (rev 3696)
@@ -47,7 +47,7 @@

{ "KinoSearch::Search::MultiSearcher" => {
make_getters => [qw( searchables starts )],
- make_constructors => ["_new"],
+ make_constructors => ["new"],
make_pod => {
synopsis => $synopsis,
constructor => { sample => $constructor },

Modified: trunk/perl/lib/KinoSearch/Search/PhraseQuery.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Search/PhraseQuery.pm 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/lib/KinoSearch/Search/PhraseQuery.pm 2008-08-03 04:55:00 UTC (rev 3696)
@@ -16,7 +16,7 @@

{ "KinoSearch::Search::PhraseQuery" => {
bind_methods => [qw( get_field _get_terms|get_terms )],
- make_constructors => ["_new"],
+ make_constructors => ["new"],
make_pod => {
constructor => { sample => '' },
synopsis => $synopsis,

Modified: trunk/perl/lib/KinoSearch/Search/TopDocs.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Search/TopDocs.pm 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/lib/KinoSearch/Search/TopDocs.pm 2008-08-03 04:55:00 UTC (rev 3696)
@@ -17,7 +17,7 @@
set_max_score
get_remotified
)],
- make_constructors => ["_new"],
+ make_constructors => ["new"],
}
}


Modified: trunk/perl/lib/KinoSearch.pm
===================================================================
--- trunk/perl/lib/KinoSearch.pm 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/lib/KinoSearch.pm 2008-08-03 04:55:00 UTC (rev 3696)
@@ -75,48 +75,6 @@
);
}

- # Translate a complex data structure in Perl to the equivalent in
- # KinoSearch C structures. Undefined elements will trigger a warning and
- # be turned to empty strings.
- sub to_kino {
- my $input = shift;
- my $reftype = reftype($input);
-
- if ( !$reftype ) {
- return KinoSearch::Util::CharBuf->new($input);
- }
- elsif ( $reftype eq 'HASH' ) {
- my $capacity = scalar keys %$input;
- my $hash = KinoSearch::Util::Hash->new( capacity => $capacity );
- while ( my ( $k, $v ) = each %$input ) {
- my $val = to_kino($v);
- $hash->store( $k, $val );
- }
- return $hash;
- }
- elsif ( $reftype eq 'ARRAY' ) {
- my $varray
- = KinoSearch::Util::VArray->new( capacity => scalar @$input );
- $varray->push( to_kino($_) ) for @$input;
- return $varray;
- }
- elsif ( a_isa_b( $input, 'KinoSearch::Obj' ) ) {
- return $input;
- }
- }
-
- # Transform what may or may not be a KinoSearch object into a Perl complex
- # data structure if possible.
- sub to_perl {
- my $input = shift;
- if ( blessed($input) and $input->can('to_pobj') ) {
- return $input->to_pobj;
- }
- else {
- return $input;
- }
- }
-
# a_isa_b serves the same purpose as the isa method from UNIVERSAL, only
# it is called as a function rather than a method.
sub a_isa_b {
@@ -155,16 +113,12 @@

{
package KinoSearch::Analysis::PolyAnalyzer;
- use KinoSearch::Util::ToolSet qw( to_kino );

our %instance_vars = __PACKAGE__->init_instance_vars(
cached_analyzers => \our %cached_analyzers, );

sub new {
my ( $either, %args ) = @_;
- if ( defined $args{analyzers} ) {
- $args{analyzers} = to_kino( $args{analyzers} );
- }

my $self = $either->_new(%args);
# Cache analyzers as Perl array as a lame-o optimization until
@@ -201,16 +155,6 @@
package KinoSearch::Analysis::Stopalizer;
use KinoSearch::Util::ToolSet qw( confess reftype to_kino );

- sub new {
- my ( $either, %args ) = @_;
- if ( defined $args{stoplist} ) {
- confess("stoplist must be a hashref")
- unless reftype( $args{stoplist} ) eq 'HASH';
- $args{stoplist} = to_kino( $args{stoplist} );
- }
- return $either->_new(%args);
- }
-
sub gen_stoplist {
my ( undef, $language ) = @_;
require Lingua::StopWords;
@@ -311,24 +255,15 @@

{
package KinoSearch::Highlight::HeatMap;
- use KinoSearch::Util::ToolSet qw( to_kino to_perl );

- sub new {
- my ( $either, %args ) = @_;
- $args{spans} = to_kino( $args{spans} );
- return $either->_new(%args);
- }
-
sub flatten_spans {
my ( $self, $spans ) = @_;
- my $flattened = $self->_flatten_spans( to_kino($spans) );
- return to_perl($flattened);
+ return $self->_flatten_spans($spans)->to_pobj;
}

sub generate_proximity_boosts {
my ( $self, $spans ) = @_;
- my $flattened = $self->_generate_proximity_boosts( to_kino($spans) );
- return to_perl($flattened);
+ return $self->_generate_proximity_boosts($spans)->to_pobj;
}
}

@@ -598,7 +533,6 @@

{
package KinoSearch::Index::IndexFileNames;
- use KinoSearch::Util::ToolSet qw( to_kino );
BEGIN {
push our @ISA, 'Exporter';
our @EXPORT_OK = qw(
@@ -606,11 +540,6 @@
unused_files
);
}
-
- sub unused_files {
- my ( $files, @snapshots ) = @_;
- return _unused_files( to_kino($files), to_kino( \@snapshots ) );
- }
}

{
@@ -775,21 +704,6 @@
}

{
- package KinoSearch::Index::SegInfo;
- use KinoSearch::Util::ToolSet qw( to_kino to_perl );
-
- sub add_metadata {
- my ( $self, $key, $val ) = @_;
- $self->_add_metadata( $key, to_kino($val) );
- }
-
- sub extract_metadata {
- my ( $self, $key ) = @_;
- return to_perl( $self->_extract_metadata($key) );
- }
-}
-
-{
package KinoSearch::Index::TermVector;

sub get_positions { shift->_get_positions->to_arrayref }
@@ -798,17 +712,6 @@
}

{
- package KinoSearch::QueryParser;
- use KinoSearch::Util::ToolSet qw( to_kino );
-
- sub new {
- my ( $either, %args ) = @_;
- $args{fields} = to_kino( $args{fields} ) if defined $args{fields};
- return $either->do_new(%args);
- }
-}
-
-{
package KinoSearch::Schema;
use KinoSearch::Util::ToolSet qw( confess blessed );

@@ -889,28 +792,8 @@
}

{
- package KinoSearch::Search::MultiSearcher;
- use KinoSearch::Util::ToolSet qw( to_kino );
-
- sub new {
- my ( $either, %args ) = @_;
- $args{searchables} = to_kino( $args{searchables} );
- return $either->_new(%args);
- }
-}
-
-{
package KinoSearch::Search::PhraseQuery;
- use KinoSearch::Util::ToolSet qw( to_kino );

- sub new {
- my ( $either, %args ) = @_;
- if ( defined $args{terms} ) {
- $args{terms} = to_kino( $args{terms} );
- }
- return $either->_new(%args);
- }
-
sub get_terms { shift->_get_terms->to_perl }
}

@@ -927,16 +810,6 @@
{
package KinoSearch::Search::TopDocs;

- use KinoSearch::Util::VArray;
- use KinoSearch::Search::RemoteFieldDoc;
-
- sub new {
- my ( $either, %args ) = @_;
- $args{score_docs} = to_kino( $args{score_docs} );
- return $either->_new(%args);
- }
-
- sub set_score_docs { $_[0]->_set_score_docs( to_kino( $_[1] ) ) }
sub get_score_docs { to_perl( shift->_get_score_docs ) }
}

@@ -1225,6 +1098,34 @@
OUTPUT:
RETVAL

+MODULE = KinoSearch PACKAGE = KinoSearch::Util::ToolSet
+
+SV*
+to_kino(sv)
+ SV *sv;
+CODE:
+{
+ kino_Obj *obj = perl_to_kino(sv);
+ KOBJ_TO_SV_NOINC(obj, RETVAL);
+}
+OUTPUT: RETVAL
+
+SV*
+to_perl(sv)
+ SV *sv;
+CODE:
+{
+ if (sv_isobject && sv_derived_from(sv, KINO_OBJ.name->ptr)) {
+ IV tmp = SvIV(SvRV(sv));
+ kino_Obj* obj = INT2PTR(kino_Obj*, tmp);
+ RETVAL = kobj_to_pobj(obj);
+ }
+ else {
+ RETVAL = newSVsv(sv);
+ }
+}
+OUTPUT: RETVAL
+
__POD__

=head1 NAME

Modified: trunk/perl/t/107-index_file_names.t
===================================================================
--- trunk/perl/t/107-index_file_names.t 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/t/107-index_file_names.t 2008-08-03 04:55:00 UTC (rev 3696)
@@ -22,12 +22,12 @@

touch( $folder, "seg_2b4-25.p" );
my $files = $folder->list;
-my $unused = unused_files( $files, $snapshot );
+my $unused = unused_files( $files, [$snapshot] );
is_deeply( $unused, ['seg_2b4-25.p'], "unused file" );

touch( $folder, "foo" );
$files = $folder->list;
-$unused = unused_files( $files, $snapshot );
+$unused = unused_files( $files, [$snapshot] );
is_deeply( $unused, ['seg_2b4-25.p'], "non ks file ignored" );

$folder->delete_file("seg_2b4-25.p");
@@ -39,8 +39,8 @@
}
@expected = sort @expected;
touch( $folder, "seg_1-999.del" );
-$files = $folder->list;
-$unused = unused_files( $files, $snapshot );
+$files = $folder->list;
+$unused = unused_files( $files, [$snapshot] );
@$unused = sort @$unused;
is_deeply( $unused, \@expected,
"unused_files handles generational files correctly" );

Modified: trunk/perl/xs/XSBind.c
===================================================================
--- trunk/perl/xs/XSBind.c 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/xs/XSBind.c 2008-08-03 04:55:00 UTC (rev 3696)
@@ -5,6 +5,18 @@
#include "KinoSearch/Store/OutStream.h"
#include "KinoSearch/Util/StringHelper.h"

+/* Convert a Perl hash into a KS Hash. Caller takes responsibility for a
+ * refcount.
+ */
+static kino_Hash*
+phash_to_khash(HV *phash);
+
+/* Convert a Perl array into a KS VArray. Caller takes responsibility for a
+ * refcount.
+ */
+static kino_VArray*
+parray_to_karray(AV *parray);
+
kino_Obj*
kino_XSBind_new_blank_obj(SV *either_sv)
{
@@ -70,6 +82,32 @@
IV tmp = SvIV( SvRV(sv) );
retval = INT2PTR(kino_Obj*, tmp);
}
+ else if (SvROK(sv)) {
+ SV *inner = SvRV(sv);
+ if (SvTYPE(inner) == SVt_PVAV) {
+ if ( vtable == (kino_VirtualTable*)&KINO_VARRAY
+ || vtable == (kino_VirtualTable*)&KINO_FASTOBJ
+ || vtable == (kino_VirtualTable*)&KINO_OBJ
+ ) {
+ retval = (kino_Obj*)parray_to_karray((AV*)inner);
+ }
+ }
+ else if (SvTYPE(inner) == SVt_PVHV) {
+ if ( vtable == (kino_VirtualTable*)&KINO_HASH
+ || vtable == (kino_VirtualTable*)&KINO_FASTOBJ
+ || vtable == (kino_VirtualTable*)&KINO_OBJ
+ ) {
+ retval = (kino_Obj*)phash_to_khash((HV*)inner);
+ }
+ }
+
+ if(retval) {
+ /* Mortalize the KS-ified copy of the Perl data structure. */
+ SV *mortal = Kino_Obj_To_Native(retval);
+ REFCOUNT_DEC(retval);
+ sv_2mortal(mortal);
+ }
+ }
}
return retval;
}
@@ -105,6 +143,91 @@
}
}

+static kino_Hash*
+phash_to_khash(HV *phash)
+{
+ chy_u32_t num_keys = hv_iterinit(phash);
+ kino_Hash *retval = kino_Hash_new(num_keys);
+
+ while (num_keys--) {
+ HE *entry = hv_iternext(phash);
+ STRLEN key_len;
+ /* Copied from Perl 5.10.0 HePV macro, because the HePV macro in
+ * earlier versions of Perl triggers a compiler warning. */
+ char *key = HeKLEN(entry) == HEf_SVKEY
+ ? SvPV(HeKEY_sv(entry), key_len)
+ : ((key_len = HeKLEN(entry)), HeKEY(entry));
+ SV *value_sv = HeVAL(entry);
+ kino_Obj *value = perl_to_kino(value_sv);
+ if (!kino_StrHelp_utf8_valid(key, key_len)) {
+ /* Force key to UTF-8. This is kind of a buggy area for Perl, and
+ * may result in round-trip weirdness. */
+ SV *key_sv = HeSVKEY_force(entry);
+ key = SvPVutf8(key_sv, key_len);
+ }
+ Kino_Hash_Store_Str(retval, key, key_len, value);
+ REFCOUNT_DEC(value);
+ }
+
+ return retval;
+}
+
+static kino_VArray*
+parray_to_karray(AV *parray)
+{
+ const chy_u32_t size = av_len(parray) + 1;
+ kino_VArray *retval = kino_VA_new(size);
+ chy_u32_t i;
+
+ for (i = 0; i < size; i++) {
+ SV **elem_sv = av_fetch(parray, i, false);
+ if (elem_sv) {
+ kino_Obj *elem = perl_to_kino(*elem_sv);
+ if (elem) {
+ Kino_VA_Store(retval, i, elem);
+ REFCOUNT_DEC(elem);
+ }
+ }
+ }
+ Kino_VA_Resize(retval, size); /* needed if last elem is NULL */
+
+ return retval;
+}
+
+kino_Obj*
+kino_XSBind_perl_to_kino(SV *sv)
+{
+ kino_Obj *retval = NULL;
+
+ if (sv && SvOK(sv)) {
+ if (SvROK(sv)) {
+ SV *inner = SvRV(sv);
+ if (SvTYPE(inner) == SVt_PVAV) {
+ retval = (kino_Obj*)parray_to_karray((AV*)inner);
+ }
+ else if (SvTYPE(inner) == SVt_PVHV) {
+ retval = (kino_Obj*)phash_to_khash((HV*)inner);
+ }
+ else if ( sv_isobject(sv)
+ && sv_derived_from(sv, KINO_OBJ.name->ptr)
+ ) {
+ IV tmp = SvIV(inner);
+ retval = INT2PTR(kino_Obj*, tmp);
+ REFCOUNT_INC(retval);
+ }
+ }
+
+ /* It's either a plain scalar or a non-KinoSearch Perl object. */
+ if (!retval) {
+ STRLEN len;
+ char *ptr = SvPVutf8(sv, len);
+ retval = (kino_Obj*)kino_CB_new_from_trusted_utf8(ptr, len);
+ }
+ }
+
+ return retval;
+}
+
static SV*
karray_to_parray(kino_VArray *varray)
{

Modified: trunk/perl/xs/XSBind.h
===================================================================
--- trunk/perl/xs/XSBind.h 2008-08-03 04:42:13 UTC (rev 3695)
+++ trunk/perl/xs/XSBind.h 2008-08-03 04:55:00 UTC (rev 3696)
@@ -38,8 +38,10 @@
kino_XSBind_new_blank_obj(SV *either_sv);

/* If the SV contains a KS object which passes an "isa" test against the
- * passed-in VirtualTable, return a pointer to it. Otherwise, throw an
- * exception.
+ * passed-in VirtualTable, return a pointer to it. If the vtable indicates
+ * that a VArray or a Hash is desired and the SV contains the corresponding
+ * Perl data structure, attempt to convert it to a mortalized KS copy.
+ * If the desired object cannot be derived, throw an exception.
*/
kino_Obj*
kino_XSBind_sv_to_kobj(SV *sv, kino_VirtualTable *vtable);
@@ -87,6 +89,13 @@
SV*
kino_XSBind_kobj_to_pobj(kino_Obj *obj);

+/* Deep conversion of Perl data structures to KS objects -- Perl hash to
+ * Hash*, Perl array to VArray*, and everything else stringified and turned to
+ * a CharBuf.
+ */
+kino_Obj*
+kino_XSBind_perl_to_kino(SV *sv);
+
/* Create a mortalized hash, built using a defaults hash and @_.
*/
HV*
@@ -254,6 +263,7 @@
#define bb_to_sv kino_XSBind_bb_to_sv
#define cb_to_sv kino_XSBind_cb_to_sv
#define kobj_to_pobj kino_XSBind_kobj_to_pobj
+#define perl_to_kino kino_XSBind_perl_to_kino
#define sv_to_class_name kino_XSBind_sv_to_class_name
#define allot_params kino_XSBind_allot_params
#define build_args_hash kino_XSBind_build_args_hash


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits