Mailing List Archive

r3776 - in trunk: c_src/KinoSearch/Analysis perl perl/lib perl/lib/KinoSearch/Analysis
Author: creamyg
Date: 2008-08-28 00:11:03 -0700 (Thu, 28 Aug 2008)
New Revision: 3776

Modified:
trunk/c_src/KinoSearch/Analysis/Stemmer.bp
trunk/c_src/KinoSearch/Analysis/Stemmer.c
trunk/perl/Build.PL
trunk/perl/lib/KinoSearch.pm
trunk/perl/lib/KinoSearch/Analysis/Stemmer.pm
Log:
Use DIY portable dynamic loading to load Snowball C symbols.


Modified: trunk/c_src/KinoSearch/Analysis/Stemmer.bp
===================================================================
--- trunk/c_src/KinoSearch/Analysis/Stemmer.bp 2008-08-27 21:06:21 UTC (rev 3775)
+++ trunk/c_src/KinoSearch/Analysis/Stemmer.bp 2008-08-28 07:11:03 UTC (rev 3776)
@@ -1,5 +1,34 @@
parcel KinoSearch cnick Kino;

+__C__
+typedef unsigned char sb_symbol;
+struct sb_stemmer;
+
+typedef struct sb_stemmer*
+(*kino_Stemmer_sb_stemmer_new_t)(const char *algorithm, const char *encoding);
+typedef void
+(*kino_Stemmer_sb_stemmer_delete_t)(struct sb_stemmer *snowstemmer);
+typedef sb_symbol*
+(*kino_Stemmer_sb_stemmer_stem_t)(struct sb_stemmer *snowstemmer,
+ const sb_symbol *text, int len);
+typedef int
+(*kino_Stemmer_sb_stemmer_length_t)(struct sb_stemmer *snowstemmer);
+extern kino_Stemmer_sb_stemmer_new_t kino_Stemmer_sb_stemmer_new;
+extern kino_Stemmer_sb_stemmer_delete_t kino_Stemmer_sb_stemmer_delete;
+extern kino_Stemmer_sb_stemmer_stem_t kino_Stemmer_sb_stemmer_stem;
+extern kino_Stemmer_sb_stemmer_length_t kino_Stemmer_sb_stemmer_length;
+#ifdef KINO_USE_SHORT_NAMES
+ #define Stemmer_sb_stemmer_new_t kino_Stemmer_sb_stemmer_new_t
+ #define Stemmer_sb_stemmer_delete_t kino_Stemmer_sb_stemmer_delete_t
+ #define Stemmer_sb_stemmer_stem_t kino_Stemmer_sb_stemmer_stem_t
+ #define Stemmer_sb_stemmer_length_t kino_Stemmer_sb_stemmer_length_t
+ #define Stemmer_sb_stemmer_new kino_Stemmer_sb_stemmer_new
+ #define Stemmer_sb_stemmer_delete kino_Stemmer_sb_stemmer_delete
+ #define Stemmer_sb_stemmer_stem kino_Stemmer_sb_stemmer_stem
+ #define Stemmer_sb_stemmer_length kino_Stemmer_sb_stemmer_length
+#endif
+__END_C__
+
/** Reduce related words to a shared root.
*
* Stemmer is an L<Analyzer|KinoSearch::Analysis::Analyzer> which reduces

Modified: trunk/c_src/KinoSearch/Analysis/Stemmer.c
===================================================================
--- trunk/c_src/KinoSearch/Analysis/Stemmer.c 2008-08-27 21:06:21 UTC (rev 3775)
+++ trunk/c_src/KinoSearch/Analysis/Stemmer.c 2008-08-28 07:11:03 UTC (rev 3776)
@@ -6,20 +6,10 @@
#include "KinoSearch/Analysis/Inversion.h"
#include "KinoSearch/Util/Native.h"

-/* Declare Snowball interface -- since it's so simple we don't need to include
- * the header.
- */
-typedef unsigned char sb_symbol;
-struct sb_stemmer;
-extern struct sb_stemmer*
-sb_stemmer_new(const char *algorithm, const char *encoding);
-void
-sb_stemmer_delete(struct sb_stemmer *snowstemmer);
-sb_symbol*
-sb_stemmer_stem(struct sb_stemmer *snowstemmer, const sb_symbol *text,
- int len);
-int
-sb_stemmer_length(struct sb_stemmer *snowstemmer);
+Stemmer_sb_stemmer_new_t Stemmer_sb_stemmer_new = NULL;
+Stemmer_sb_stemmer_delete_t Stemmer_sb_stemmer_delete = NULL;
+Stemmer_sb_stemmer_stem_t Stemmer_sb_stemmer_stem = NULL;
+Stemmer_sb_stemmer_length_t Stemmer_sb_stemmer_length = NULL;

Stemmer*
Stemmer_new(const CharBuf *language)
@@ -39,7 +29,7 @@
lang_buf[1] = tolower(CB_Code_Point_At(language, 1));
lang_buf[2] = '\0';
Native_callback(self, "lazy_load_snowball", 0);
- self->snowstemmer = sb_stemmer_new(lang_buf, "UTF_8");
+ self->snowstemmer = kino_Stemmer_sb_stemmer_new(lang_buf, "UTF_8");
if (!self->snowstemmer)
CONFESS("Can't find a Snowball stemmer for %o", language);

@@ -53,9 +43,9 @@
struct sb_stemmer *const snowstemmer = self->snowstemmer;

while (NULL != (token = Inversion_Next(inversion))) {
- sb_symbol *stemmed_text = sb_stemmer_stem(snowstemmer,
+ sb_symbol *stemmed_text = kino_Stemmer_sb_stemmer_stem(snowstemmer,
(sb_symbol*)token->text, token->len);
- size_t len = sb_stemmer_length(snowstemmer);
+ size_t len = kino_Stemmer_sb_stemmer_length(snowstemmer);
if (len > token->len) {
free(token->text);
token->text = MALLOCATE(len + 1, char);
@@ -70,7 +60,7 @@
void
Stemmer_destroy(Stemmer *self)
{
- if (self->snowstemmer) sb_stemmer_delete(self->snowstemmer);
+ if (self->snowstemmer) kino_Stemmer_sb_stemmer_delete(self->snowstemmer);
FREE_OBJ(self);
}


Modified: trunk/perl/Build.PL
===================================================================
--- trunk/perl/Build.PL 2008-08-27 21:06:21 UTC (rev 3775)
+++ trunk/perl/Build.PL 2008-08-28 07:11:03 UTC (rev 3776)
@@ -40,7 +40,7 @@
dist_version_from => 'lib/KinoSearch.pm',
requires => {
'Compress::Zlib' => 0,
- 'Lingua::Stem::Snowball' => 0.951,
+ 'Lingua::Stem::Snowball' => 0.952,
'Lingua::StopWords' => 0.09,
'JSON::XS' => 2.01,
},

Modified: trunk/perl/lib/KinoSearch/Analysis/Stemmer.pm
===================================================================
--- trunk/perl/lib/KinoSearch/Analysis/Stemmer.pm 2008-08-27 21:06:21 UTC (rev 3775)
+++ trunk/perl/lib/KinoSearch/Analysis/Stemmer.pm 2008-08-28 07:11:03 UTC (rev 3776)
@@ -4,6 +4,32 @@

__END__

+__XS__
+
+MODULE = KinoSearch PACKAGE = KinoSearch::Analysis::Stemmer
+
+void
+_copy_snowball_symbols()
+PPCODE:
+{
+ SV *sb_stemmer_new_sv = extract_sv(PL_modglobal,
+ "Lingua::Stem::Snowball::sb_stemmer_new", 38);
+ SV *sb_stemmer_delete_sv = extract_sv(PL_modglobal,
+ "Lingua::Stem::Snowball::sb_stemmer_delete", 41);
+ SV *sb_stemmer_stem_sv = extract_sv(PL_modglobal,
+ "Lingua::Stem::Snowball::sb_stemmer_stem", 39);
+ SV *sb_stemmer_length_sv = extract_sv(PL_modglobal,
+ "Lingua::Stem::Snowball::sb_stemmer_length", 41);
+ kino_Stemmer_sb_stemmer_new
+ = (kino_Stemmer_sb_stemmer_new_t)SvIV(sb_stemmer_new_sv);
+ kino_Stemmer_sb_stemmer_delete
+ = (kino_Stemmer_sb_stemmer_delete_t)SvIV(sb_stemmer_delete_sv);
+ kino_Stemmer_sb_stemmer_stem
+ = (kino_Stemmer_sb_stemmer_stem_t)SvIV(sb_stemmer_stem_sv);
+ kino_Stemmer_sb_stemmer_length
+ = (kino_Stemmer_sb_stemmer_length_t)SvIV(sb_stemmer_length_sv);
+}
+
__AUTO_XS__

my $synopsis = <<'END_SYNOPSIS';

Modified: trunk/perl/lib/KinoSearch.pm
===================================================================
--- trunk/perl/lib/KinoSearch.pm 2008-08-27 21:06:21 UTC (rev 3775)
+++ trunk/perl/lib/KinoSearch.pm 2008-08-28 07:11:03 UTC (rev 3776)
@@ -80,7 +80,10 @@

{
package KinoSearch::Analysis::Stemmer;
- sub lazy_load_snowball { require Lingua::Stem::Snowball }
+ sub lazy_load_snowball {
+ require Lingua::Stem::Snowball;
+ KinoSearch::Analysis::Stemmer::_copy_snowball_symbols();
+ }
}

{


_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits