Mailing List Archive

svn commit: r453556 [1/3] - in /spamassassin/branches/jm_re2c_hacks: ./ lib/Mail/SpamAssassin/Plugin/ rule2xs/RabinKarpAccel-0.01/ rule2xs/RabinKarpAccel-0.01/lib/ rule2xs/RabinKarpAccel-0.01/t/
Author: jm
Date: Fri Oct 6 04:42:25 2006
New Revision: 453556

URL: http://svn.apache.org/viewvc?view=rev&rev=453556
Log:
check in Rabin-Karp code. it works, but sadly winds up slower than normal body rules in 'real-world' mass-checks; the overhead outweighs the efficiency benefits of parallelized matching vs sequential regexp matches

Added:
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Changes
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/MANIFEST
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/META.yml
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Makefile.PL
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/README
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/RabinKarpAccel.xs
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/lib/
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/lib/RabinKarpAccel.pm
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/ppport.h
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/t/
spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/t/RabinKarpAccel.t
Modified:
spamassassin/branches/jm_re2c_hacks/MANIFEST
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm

Modified: spamassassin/branches/jm_re2c_hacks/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/MANIFEST?view=diff&rev=453556&r1=453555&r2=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/MANIFEST (original)
+++ spamassassin/branches/jm_re2c_hacks/MANIFEST Fri Oct 6 04:42:25 2006
@@ -497,3 +497,6 @@
t/uribl.t
t/shortcircuit.t
t/spamc_y.t
+lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
+lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm
+lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?view=diff&rev=453556&r1=453555&r2=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Fri Oct 6 04:42:25 2006
@@ -62,31 +62,42 @@
sub finish_parsing_end {
my ($self, $params) = @_;
my $conf = $params->{conf};
+ $self->extract_bases($conf);
+}
+
+sub extract_bases {
+ my ($self, $conf) = @_;

# TODO: need a better way to do this rather than using an env
# var as a back channel
my $rawf = $ENV{'RULE_REGEXP_DUMP_FILE'};
- return unless $rawf;
+ my $f;

- $rawf =~ /^(.*)$/;
- my $f = $1; # untaint; allow anything here, it's from %ENV and safe
+ if ($rawf) {
+ $rawf =~ /^(.*)$/;
+ $f = $1; # untaint; allow anything here, it's from %ENV and safe
+ }

- $self->extract_bases_for_set ($f, $conf, $conf->{body_tests}, 'body');
+ $self->extract_set($f, $conf, $conf->{body_tests}, 'body');
}

-sub extract_bases_for_set {
+sub extract_set {
my ($self, $dumpfile, $conf, $test_set, $ruletype) = @_;

foreach my $pri (keys %{$test_set}) {
my $nicepri = $pri; $nicepri =~ s/-/neg/g;
- $self->extract_all($dumpfile, $conf, $test_set->{$pri}, $ruletype.'_'.$nicepri);
+ $self->extract_set_pri($conf, $test_set->{$pri}, $ruletype.'_'.$nicepri);
+
+ if ($dumpfile) {
+ $self->dump_base_strings($dumpfile, $conf, $ruletype.'_'.$nicepri);
+ }
}
}

###########################################################################

-sub extract_all {
- my ($self, $dumpfile, $conf, $rules, $ruletype) = @_;
+sub extract_set_pri {
+ my ($self, $conf, $rules, $ruletype) = @_;

my @good_bases = ();
my @failed = ();
@@ -106,6 +117,7 @@
my $rule = $rules->{$name};

# ignore ReplaceTags rules
+ # TODO: need cleaner way to do this
next if ($conf->{rules_to_replace}->{$name});

my $base = $self->extract_base($rule, 0);
@@ -166,8 +178,8 @@
# re2c, and it appears the re2c developers don't plan to offer this:
# https://sourceforge.net/tracker/index.php?func=detail&aid=1540845&group_id=96864&atid=616203

- open (OUT, ">$dumpfile") or die "cannot write to $dumpfile!";
- print OUT "name $ruletype\n";
+ $conf->{base_orig}->{$ruletype} = { };
+ $conf->{base_string}->{$ruletype} = { };

foreach my $set1 (@good_bases) {
my $base1 = $set1->{base};
@@ -175,7 +187,7 @@
my $key1 = $set1->{name};
next if ($base1 eq '' or $key1 eq '');

- print OUT "orig $key1 $orig1\n";
+ $conf->{base_orig}->{$ruletype}->{$key1} = $orig1;

foreach my $set2 (@good_bases) {
next if ($set1 == $set2);
@@ -204,15 +216,34 @@
my $base = $set->{base};
my $key = $set->{name};
next unless $base;
- print OUT "r $base:$key\n";
+ $conf->{base_string}->{$ruletype}->{$base} = $key;
}
- close OUT or die "close failed on $dumpfile!";

warn ("zoom: base extraction complete for $ruletype: yes=$yes no=$no\n");
}

###########################################################################

+sub dump_base_strings {
+ my ($self, $dumpfile, $conf, $ruletype) = @_;
+
+ open (OUT, ">$dumpfile") or die "cannot write to $dumpfile!";
+ print OUT "name $ruletype\n";
+
+ foreach my $key1 (sort keys %{$conf->{base_orig}->{$ruletype}}) {
+ print OUT "orig $key1 $conf->{base_orig}->{$ruletype}->{$key1}\n";
+ }
+
+ foreach my $key (sort keys %{$conf->{base_string}->{$ruletype}}) {
+ print OUT "r $key:$conf->{base_string}->{$ruletype}->{$key}\n";
+ }
+ close OUT or die "close failed on $dumpfile!";
+
+ warn ("zoom: bases written to '$dumpfile'\n");
+}
+
+###########################################################################
+
# TODO:
# NO /no.{1,10}P(?:er|re)scription.{1,10}(?:needed|require|necessary)/i
# => should extract 'scription' somehow
@@ -256,6 +287,10 @@

# remove (?i)
$rule =~ s/\(\?i\)//gs;
+ }
+ else {
+ return if $rule =~ /\(\?i\)/;
+ return if $mods =~ /i/;
}

# remove /m and /s modifiers

Added: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm?view=auto&rev=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm (added)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RabinKarpBody.pm Fri Oct 6 04:42:25 2006
@@ -0,0 +1,140 @@
+# <@LICENSE>
+# Copyright 2004 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </@LICENSE>
+
+package Mail::SpamAssassin::Plugin::RabinKarpBody;
+
+use Mail::SpamAssassin::Plugin;
+use Mail::SpamAssassin::Logger;
+use RabinKarpAccel;
+use Mail::SpamAssassin::Plugin::BodyRuleBaseExtractor;
+
+use strict;
+use warnings;
+use bytes;
+
+use vars qw(@ISA);
+@ISA = qw(Mail::SpamAssassin::Plugin);
+
+sub new {
+ my $class = shift;
+ my $mailsaobject = shift;
+ $class = ref($class) || $class;
+ my $self = $class->SUPER::new($mailsaobject);
+ bless ($self, $class);
+
+ return $self;
+}
+
+###########################################################################
+
+sub finish_parsing_end {
+ my ($self, $params) = @_;
+ my $conf = $params->{conf};
+
+ my $basextor = Mail::SpamAssassin::Plugin::BodyRuleBaseExtractor->new(
+ $self->{main});
+ $basextor->extract_bases($conf);
+
+ $conf->{skip_body_rules} = { };
+ $self->setup_test_set ($conf, $conf->{body_tests}, 'body');
+}
+
+sub setup_test_set {
+ my ($self, $conf, $test_set, $ruletype) = @_;
+ foreach my $pri (keys %{$test_set}) {
+ my $nicepri = $pri; $nicepri =~ s/-/neg/g;
+ $self->setup_test_set_pri($conf, $test_set->{$pri}, $ruletype.'_'.$nicepri);
+ }
+}
+
+sub setup_test_set_pri {
+ my ($self, $conf, $rules, $ruletype) = @_;
+
+ $conf->{$ruletype}->{rkhashes} = { };
+ foreach my $base (keys %{$conf->{base_string}->{$ruletype}}) {
+ next unless (length $base > 4);
+ my @rules = split(' ', $conf->{base_string}->{$ruletype}->{$base});
+ RabinKarpAccel::add_bitvec($conf->{$ruletype}->{rkhashes}, lc $base, [ @rules ]);
+ foreach my $rule (@rules) {
+ $conf->{skip_body_rules}->{$rule} = 1;
+ }
+ }
+}
+
+###########################################################################
+
+sub run_body_hack {
+ my ($self, $params) = @_;
+
+ return unless ($params->{ruletype} eq 'body');
+
+ my $pri = $params->{priority};
+ my $nicepri = $params->{priority}; $nicepri =~ s/-/neg/g;
+ my $ruletype = ($params->{ruletype}.'_'.$nicepri);
+ my $scanner = $params->{permsgstatus};
+ my $conf = $scanner->{conf};
+
+ my $rkhashes = $conf->{$ruletype}->{rkhashes};
+ if (!$rkhashes || (scalar keys %{$conf->{$ruletype}->{rkhashes}} <= 0))
+ {
+ dbg("zoom: run_body_hack for $ruletype skipped, no rules");
+ return;
+ }
+
+ my $do_dbg = (would_log('dbg', 'zoom') > 1);
+ my $scoresptr = $conf->{scores};
+
+ dbg("zoom: run_body_hack for $ruletype start");
+
+ {
+ no strict "refs";
+ foreach my $line (@{$params->{lines}})
+ {
+ my $results = RabinKarpAccel::scan_string($rkhashes, lc $line);
+ next unless $results;
+
+ my %alreadydone = ();
+ foreach my $rulename (@{$results})
+ {
+ # only try each rule once per line
+ next if exists $alreadydone{$rulename};
+ $alreadydone{$rulename} = undef;
+
+ # ignore 0-scored rules, of course
+ next unless $scoresptr->{$rulename};
+
+ # TODO: it would be very useful to provide an optional
+ # means of instrumenting the ruleset, so that we can
+ # find out when the base matched but the full RE didn't.
+
+ # if ($do_dbg) {
+ # dbg("zoom: base found for $rulename: $line");
+ # }
+
+ # run the real regexp -- on this line alone
+ &{'Mail::SpamAssassin::PerMsgStatus::'.$rulename.'_one_line_body_test'}
+ ($scanner, $line);
+ }
+ }
+ use strict "refs";
+ }
+
+ dbg("zoom: run_body_hack for $ruletype done");
+}
+
+###########################################################################
+
+1;

Added: spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Changes
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Changes?view=auto&rev=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Changes (added)
+++ spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Changes Fri Oct 6 04:42:25 2006
@@ -0,0 +1,6 @@
+Revision history for Perl extension RabinKarpAccel.
+
+0.01 Mon Oct 2 14:11:46 2006
+ - original version; created by h2xs 1.23 with options
+ -b 5.6.1 -A -f -n RabinKarpAccel
+

Added: spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/MANIFEST?view=auto&rev=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/MANIFEST (added)
+++ spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/MANIFEST Fri Oct 6 04:42:25 2006
@@ -0,0 +1,9 @@
+RabinKarpAccel.xs
+Changes
+Makefile.PL
+MANIFEST
+ppport.h
+README
+t/RabinKarpAccel.t
+lib/RabinKarpAccel.pm
+META.yml Module meta-data (added by MakeMaker)

Added: spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/META.yml
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/META.yml?view=auto&rev=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/META.yml (added)
+++ spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/META.yml Fri Oct 6 04:42:25 2006
@@ -0,0 +1,10 @@
+# http://module-build.sourceforge.net/META-spec.html
+#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
+name: RabinKarpAccel
+version: 0.01
+version_from: lib/RabinKarpAccel.pm
+installdirs: site
+requires:
+
+distribution_type: module
+generated_by: ExtUtils::MakeMaker version 6.30_01

Added: spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Makefile.PL
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Makefile.PL?view=auto&rev=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Makefile.PL (added)
+++ spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/Makefile.PL Fri Oct 6 04:42:25 2006
@@ -0,0 +1,17 @@
+use 5.006001;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ NAME => 'RabinKarpAccel',
+ VERSION_FROM => 'lib/RabinKarpAccel.pm', # finds $VERSION
+ PREREQ_PM => {}, # e.g., Module::Name => 1.1
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (ABSTRACT_FROM => 'lib/RabinKarpAccel.pm', # retrieve abstract from module
+ AUTHOR => 'A. U. Thor <jm@>') : ()),
+ LIBS => [''], # e.g., '-lm'
+ DEFINE => '', # e.g., '-DHAVE_SOMETHING'
+ INC => '-I.', # e.g., '-I. -I/usr/include/other'
+ # Un-comment this if you add C files to link with later:
+ # OBJECT => '$(O_FILES)', # link all the C files too
+);

Added: spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/README
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/README?view=auto&rev=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/README (added)
+++ spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/README Fri Oct 6 04:42:25 2006
@@ -0,0 +1,40 @@
+RabinKarpAccel version 0.01
+========================
+
+The README is used to introduce the module and provide instructions on
+how to install the module, any machine dependencies it may have (for
+example C compilers and installed libraries) and any other information
+that should be provided before the module is installed.
+
+A README file is required for CPAN modules since CPAN extracts the
+README file from a module distribution so that people browsing the
+archive can use it get an idea of the modules uses. It is usually a
+good idea to provide version information here so that people can
+decide whether fixes for the module are worth downloading.
+
+INSTALLATION
+
+To install this module type the following:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+ blah blah blah
+
+COPYRIGHT AND LICENCE
+
+Put the correct copyright and licence information here.
+
+Copyright (C) 2006 by A. U. Thor
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.8.7 or,
+at your option, any later version of Perl 5 you may have available.
+
+

Added: spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/RabinKarpAccel.xs
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/RabinKarpAccel.xs?view=auto&rev=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/RabinKarpAccel.xs (added)
+++ spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/RabinKarpAccel.xs Fri Oct 6 04:42:25 2006
@@ -0,0 +1,253 @@
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include "ppport.h"
+
+/* see http://www.eecs.harvard.edu/~ellard/Courses/sq98_root.pdf , pp 73-80
+ * for the Rabin-Karp algorithm definition
+ */
+#define fast_b ((long) 257)
+#define fast_m ((long) 1024)
+#define ksize 4
+
+
+static void av_push_all (AV *to, AV *from)
+{
+ int i, len;
+ SV **svptr;
+
+ len = av_len(from);
+ for (i = 0; i <= len; i++) {
+ svptr = av_fetch(from, i, 0);
+ if (svptr == NULL) {
+ continue; /* this can happen, it seems */
+ }
+
+ //SvREFCNT_inc(*svptr);
+ av_push (to, *svptr);
+ }
+}
+
+static void add_rk_hit (AV *results, HV *keys, SV **keysv)
+{
+ AV *rulesav;
+ int i, len;
+
+ /* add rule names to results AV */
+ rulesav = (AV *) SvRV(*keysv);
+
+ len = av_len(rulesav);
+ for (i = 0; i <= len; i++) {
+ SV **svptr = av_fetch(rulesav, i, 0);
+ if (svptr == NULL) {
+ continue; /* this can happen, it seems */
+ }
+
+ //SvREFCNT_inc(*svptr);
+ av_push (results, *svptr);
+ }
+}
+
+static char *
+get_flut_str (HV *keys)
+{
+ SV **mapptr;
+ char buf[(int) fast_m];
+ SV *newmap;
+ char *flut_str;
+ STRLEN maplen;
+
+ mapptr = hv_fetch (keys, "*BITMAP", 7, 0);
+
+ /* create the map if it doesn't exist */
+ if (mapptr == NULL || *mapptr == NULL)
+ {
+ Zero((void *) buf, (int) fast_m, char);
+ newmap = newSVpvn(buf, (int) fast_m); /* will take a copy */
+ hv_store (keys, "*BITMAP", 7, newmap, 0);
+ mapptr = &newmap;
+ }
+
+ flut_str = (char *) SvPV(*mapptr, maplen);
+ if (maplen != (int) fast_m) {
+ die ("oops! maplen shrunk to %d", maplen);
+ }
+
+ return flut_str;
+}
+
+static void
+set_in_flut (HV *keys, int P_hash)
+{
+ char *flut_str;
+
+ if (P_hash >= (int) fast_m) {
+ die ("oops! P_hash %d > maplen %d", P_hash, (int) fast_m);
+ }
+ flut_str = get_flut_str(keys);
+ flut_str[P_hash] = (char) 1;
+}
+
+
+static unsigned long
+rk_exp_mod (unsigned long x, unsigned long n, unsigned long m)
+{
+ unsigned long square, exp;
+
+ if (n == 0) {
+ return 1;
+ }
+ else if (n == 1) {
+ return (x % m);
+ }
+ else {
+ square = (x * x) % m;
+ exp = rk_exp_mod (square, n / 2, m);
+ if (n % 2 == 0) {
+ return (exp % m);
+ } else {
+ return ((exp * x) % m);
+ }
+ }
+}
+
+static long
+rk_hash (unsigned char *str, long len, long b, long m)
+{
+ long i;
+ long value = 0;
+ long power = 1;
+
+ for (i = len - 1; i >= 0; i--) {
+ value += (power * str [i]);
+ value %= m;
+ power *= b;
+ power %= m;
+ }
+ return (value);
+}
+
+static void
+rk_search (AV *results, HV *keys, unsigned char *T, long T_len)
+{
+ long top_one;
+ long T_hash;
+ long i;
+ SV *hashkey;
+ char *hashkeystr;
+ STRLEN len;
+ SV **keysv;
+ char *flut_str;
+
+ flut_str = get_flut_str(keys);
+ top_one = rk_exp_mod (fast_b, ksize, fast_m);
+ T_hash = rk_hash (T, ksize, fast_b, fast_m);
+
+ for (i = 0; i <= T_len - ksize; i++) {
+ /* do we have a hash hit? */
+ if (flut_str[(int) T_hash] != (char) 0) {
+ hashkey = sv_2mortal(newSVpvf("%d", (int) T_hash));
+ hashkeystr = SvPV(hashkey, len);
+ if ((keysv = hv_fetch (keys, hashkeystr, len, 0)) != NULL)
+ {
+ /* copy the rule name SV ptrs to the results AV */
+ add_rk_hit(results, keys, keysv);
+ }
+ }
+
+ /* the bit-shifting Karp-Rabin sliding hash -- bit-shifts are fast */
+ T_hash *= fast_b;
+ T_hash -= ((T[i] * top_one) & (fast_m - 1));
+ T_hash += T[i + ksize];
+ T_hash &= (fast_m - 1);
+ if (T_hash < 0) { T_hash += fast_m; }
+ }
+}
+
+
+
+MODULE = RabinKarpAccel PACKAGE = RabinKarpAccel
+
+PROTOTYPES: DISABLE
+
+void
+add_bitvec(bvhash, str, rulesary)
+ SV* bvhash
+ SV* str
+ SV* rulesary
+
+ PREINIT:
+ unsigned char *pstart;
+ unsigned char *pend;
+ STRLEN plen;
+ HV *bvhv;
+ SV *hashkey;
+ char *hashkeystr;
+ STRLEN len;
+ long P_hash;
+ SV **svptr;
+
+ CODE:
+ if (!SvROK(bvhash) || (SvTYPE(SvRV(bvhash)) != SVt_PVHV)) {
+ die("bad type for bvhash");
+ }
+ bvhv = (HV *) SvRV(bvhash);
+
+ if (!SvROK(rulesary) || (SvTYPE(SvRV(rulesary)) != SVt_PVAV)) {
+ die("bad type for rulesary");
+ }
+
+ pstart = (unsigned char *) SvPVutf8(str, plen);
+ pend = pstart + plen;
+
+ P_hash = rk_hash (pstart, ksize, fast_b, fast_m);
+
+ /* add the contents of @{$rulesary} to the bvhv hash under
+ * the key "P_hash" */
+ hashkey = newSVpvf("%d", (int) P_hash);
+ hashkeystr = SvPV(hashkey, len);
+ svptr = hv_fetch (bvhv, hashkeystr, len, 1);
+
+ if (svptr == NULL || *svptr == NULL ||
+ !SvROK(*svptr) ||
+ (SvTYPE(SvRV(*svptr)) != SVt_PVAV))
+ {
+ SvREFCNT_inc(rulesary);
+ hv_store (bvhv, hashkeystr, len, rulesary, 0);
+ } else {
+ av_push_all ((AV *) SvRV(*svptr), (AV *) SvRV(rulesary));
+ }
+
+ /* ensure we set the flag char in the fast lookup table, too */
+ set_in_flut(bvhv, (int) P_hash);
+
+SV *
+scan_string(bvhash, psv)
+ SV* bvhash
+ SV* psv
+
+ PREINIT:
+ unsigned char *pstart;
+ unsigned char *pend;
+ STRLEN plen;
+ AV *results;
+ HV *bvhv;
+
+ CODE:
+ if (!SvROK(bvhash) || (SvTYPE(SvRV(bvhash)) != SVt_PVHV)) {
+ die("bad type for bvhash");
+ }
+ bvhv = (HV *) SvRV(bvhash);
+
+ pstart = (unsigned char *) SvPVutf8(psv, plen);
+ pend = pstart + plen;
+ results = (AV *) sv_2mortal((SV *) newAV());
+
+ rk_search (results, bvhv, pstart, plen);
+
+ RETVAL = newRV((SV *) results);
+ OUTPUT:
+ RETVAL
+
+

Added: spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/lib/RabinKarpAccel.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/lib/RabinKarpAccel.pm?view=auto&rev=453556
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/lib/RabinKarpAccel.pm (added)
+++ spamassassin/branches/jm_re2c_hacks/rule2xs/RabinKarpAccel-0.01/lib/RabinKarpAccel.pm Fri Oct 6 04:42:25 2006
@@ -0,0 +1,86 @@
+package RabinKarpAccel;
+
+use 5.006001;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+# Items to export into callers namespace by default. Note: do not export
+# names by default without a very good reason. Use EXPORT_OK instead.
+# Do not simply export all your public functions/methods/constants.
+
+# This allows declaration use RabinKarpAccel ':all';
+# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
+# will save memory.
+our %EXPORT_TAGS = ( 'all' => [ qw(
+
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+
+);
+
+our $VERSION = '0.01';
+
+require XSLoader;
+XSLoader::load('RabinKarpAccel', $VERSION);
+
+# Preloaded methods go here.
+
+1;
+__END__
+# Below is stub documentation for your module. You'd better edit it!
+
+=head1 NAME
+
+RabinKarpAccel - Perl extension for blah blah blah
+
+=head1 SYNOPSIS
+
+ use RabinKarpAccel;
+ blah blah blah
+
+=head1 DESCRIPTION
+
+Stub documentation for RabinKarpAccel, created by h2xs. It looks like the
+author of the extension was negligent enough to leave the stub
+unedited.
+
+Blah blah blah.
+
+=head2 EXPORT
+
+None by default.
+
+
+
+=head1 SEE ALSO
+
+Mention other useful documentation such as the documentation of
+related modules or operating system documentation (such as man pages
+in UNIX), or any relevant external documentation such as RFCs or
+standards.
+
+If you have a mailing list set up for your module, mention it here.
+
+If you have a web site set up for your module, mention it here.
+
+=head1 AUTHOR
+
+A. U. Thor, E<lt>jm@E<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2006 by A. U. Thor
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.8.7 or,
+at your option, any later version of Perl 5 you may have available.
+
+
+=cut