Mailing List Archive

svn commit: r438862 - /spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
Author: jm
Date: Thu Aug 31 04:19:44 2006
New Revision: 438862

URL: http://svn.apache.org/viewvc?rev=438862&view=rev
Log:
remove method call overhead for body, rawbody, header and uri rules; this provides a 3% speedup on a mixed ham/spam corpus

Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=438862&r1=438861&r2=438862&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Thu Aug 31 04:19:44 2006
@@ -90,6 +90,7 @@
'conf' => $main->{conf},
'async' => Mail::SpamAssassin::AsyncLoop->new($main)
};
+ #$self->{main}->{use_rule_subs} = 1;

if (defined $opts && $opts->{disable_auto_learning}) {
$self->{disable_auto_learning} = 1;
@@ -1801,11 +1802,14 @@
return;
}

+ my $use_rule_subs = $self->{main}->{use_rule_subs};
+
my $evalstr = $self->start_rules_plugin_code("header");
my $evalstr2 = '';

# hash to hold the rules, "header\tdefault value" => rulename
my %ordered = ();
+ my %testcode = ();

while (my($rulename, $rule) = each %{$self->{conf}{head_tests}->{$priority}}) {
my $def = '';
@@ -1829,16 +1833,22 @@
next if (!$self->is_user_rule_sub ($rulename.'_head_test'));
}

- $evalstr2 .= '
- sub '.$rulename.'_head_test {
- my($self,$text) = @_;
- '.$self->hash_line_for_rule($rulename).'
- while ($text '.$testtype.'~ '.$pat.'g) {
- $self->got_hit(q#'.$rulename.'#, "", ruletype => "header");
- '. $self->hit_rule_plugin_code($rulename, "header", "last") . '
- }
- }';
-
+ if ($use_rule_subs) {
+ $evalstr2 .= '
+ sub '.$rulename.'_head_test {
+ my($self,$text) = @_;
+ '.$self->hash_line_for_rule($rulename).'
+ while ($text '.$testtype.'~ '.$pat.'g) {
+ $self->got_hit(q#'.$rulename.'#, "", ruletype => "header");
+ '. $self->hit_rule_plugin_code($rulename, "header", "last") . '
+ }
+ }
+ ';
+ }
+ else {
+ # store for use below
+ $testcode{$rulename} = $testtype.'~ '.$pat.'g';
+ }
}

# setup the function to run the rules
@@ -1846,12 +1856,29 @@
my($hdrname, $def) = split(/\t/, $k, 2);
$evalstr .= ' $hval = $self->get(q#'.$hdrname.'#, q#'.$def.'#);';
foreach my $rulename (@{$v}) {
- $evalstr .= '
- if ($scoresptr->{q#'.$rulename.'#}) {
- '.$rulename.'_head_test($self, $hval); # no need for OO calling here (its faster this way)
- '.$self->ran_rule_plugin_code($rulename, "header").'
+ if ($use_rule_subs) {
+ $evalstr .= '
+ if ($scoresptr->{q#'.$rulename.'#}) {
+ '.$rulename.'_head_test($self, $hval);
+ '.$self->ran_rule_plugin_code($rulename, "header").'
+ }
+ ';
+ }
+ else {
+ my $testcode = $testcode{$rulename};
+
+ $evalstr .= '
+ if ($scoresptr->{q#'.$rulename.'#}) {
+ pos $hval = 0;
+ '.$self->hash_line_for_rule($rulename).'
+ while ($hval '.$testcode.') {
+ $self->got_hit(q#'.$rulename.'#, "", ruletype => "header");
+ '.$self->hit_rule_plugin_code($rulename, "header", "last").'
+ }
+ '.$self->ran_rule_plugin_code($rulename, "header").'
+ }
+ ';
}
- ';
}
}

@@ -1916,24 +1943,50 @@
return;
}

+ # caller can set this member of the Mail::SpamAssassin object to
+ # override this; useful for profiling rule runtimes, although I think
+ # the HitFreqsRuleTiming.pm plugin is probably better nowadays anyway
+ my $use_rule_subs = $self->{main}->{use_rule_subs};
+
# build up the eval string...
my $evalstr = $self->start_rules_plugin_code("body");
my $evalstr2 = '';
+ my $loopid = 0;

- while (my($rulename, $pat) = each %{$self->{conf}{body_tests}->{$priority}}) {
- $evalstr .= '
- if ($scoresptr->{q{'.$rulename.'}}) {
- '.$rulename.'_body_test($self,@_);
- '.$self->ran_rule_plugin_code($rulename, "body").'
- }
- ';
+ while (my($rulename, $pat) = each %{$self->{conf}{body_tests}->{$priority}})
+ {
+ if ($use_rule_subs) {
+ $evalstr .= '
+ if ($scoresptr->{q{'.$rulename.'}}) {
+ '.$rulename.'_body_test($self,@_);
+ }
+ ';
+ }
+ else {
+ $loopid++;
+ $evalstr .= '
+ if ($scoresptr->{q{'.$rulename.'}}) {
+ body_'.$loopid.': foreach my $l (@_) {
+ pos $l = 0;
+ '.$self->hash_line_for_rule($rulename).'
+ while ($l =~ '.$pat.'g) {
+ $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
+ '. $self->hit_rule_plugin_code($rulename, "body",
+ "last body_".$loopid) . '
+ }
+ }
+ '.$self->ran_rule_plugin_code($rulename, "body").'
+ }
+ ';
+ }

if ($doing_user_rules) {
next if (!$self->is_user_rule_sub ($rulename.'_body_test'));
}

- $evalstr2 .= '
- sub '.$rulename.'_body_test {
+ if ($use_rule_subs) {
+ $evalstr2 .= '
+ sub '.$rulename.'_body_test {
my $self = shift;
foreach (@_) {
pos = 0;
@@ -1943,8 +1996,9 @@
'. $self->hit_rule_plugin_code($rulename, "body", "return") . '
}
}
+ }
+ ';
}
- ';
}

# clear out a previous version of this fn, if already defined
@@ -2329,35 +2383,59 @@
return;
}

+ my $use_rule_subs = $self->{main}->{use_rule_subs};
+
# otherwise build up the eval string...
my $evalstr = $self->start_rules_plugin_code("uri");
my $evalstr2 = '';
+ my $loopid = 0;

while (my($rulename, $pat) = each %{$self->{conf}{uri_tests}->{$priority}}) {
- $evalstr .= '
- if ($scoresptr->{q{'.$rulename.'}}) {
- '.$rulename.'_uri_test($self, @_);
- '.$self->ran_rule_plugin_code($rulename, "uri").'
- }
- ';
+ if ($use_rule_subs) {
+ $evalstr .= '
+ if ($scoresptr->{q{'.$rulename.'}}) {
+ '.$rulename.'_uri_test($self, @_);
+ '.$self->ran_rule_plugin_code($rulename, "uri").'
+ }
+ ';
+ }
+ else {
+ $loopid++;
+ $evalstr .= '
+ if ($scoresptr->{q{'.$rulename.'}}) {
+ uri_'.$loopid.': foreach my $l (@_) {
+ pos $l = 0;
+ '.$self->hash_line_for_rule($rulename).'
+ while ($l =~ '.$pat.'g) {
+ $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
+ '. $self->hit_rule_plugin_code($rulename, "uri",
+ "last uri_".$loopid) . '
+ }
+ }
+ '.$self->ran_rule_plugin_code($rulename, "uri").'
+ }
+ ';
+ }

if ($doing_user_rules) {
next if (!$self->is_user_rule_sub ($rulename.'_uri_test'));
}

- $evalstr2 .= '
- sub '.$rulename.'_uri_test {
- my $self = shift;
- foreach (@_) {
- pos = 0;
- '.$self->hash_line_for_rule($rulename).'
- while ('.$pat.'g) {
- $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
- '. $self->hit_rule_plugin_code($rulename, "uri", "return") .'
- }
- }
+ if ($use_rule_subs) {
+ $evalstr2 .= '
+ sub '.$rulename.'_uri_test {
+ my $self = shift;
+ foreach (@_) {
+ pos = 0;
+ '.$self->hash_line_for_rule($rulename).'
+ while ('.$pat.'g) {
+ $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
+ '. $self->hit_rule_plugin_code($rulename, "uri", "return") .'
+ }
+ }
+ }
+ ';
}
- ';
}

# clear out a previous version of this fn, if already defined
@@ -2420,35 +2498,59 @@
return;
}

+ my $use_rule_subs = $self->{main}->{use_rule_subs};
+
# build up the eval string...
my $evalstr = $self->start_rules_plugin_code("rawbody");
my $evalstr2 = '';
+ my $loopid = 0;

while (my($rulename, $pat) = each %{$self->{conf}{rawbody_tests}->{$priority}}) {
- $evalstr .= '
- if ($scoresptr->{q{'.$rulename.'}}) {
- '.$rulename.'_rawbody_test($self, @_);
- '.$self->ran_rule_plugin_code($rulename, "rawbody").'
- }
- ';
+ if ($use_rule_subs) {
+ $evalstr .= '
+ if ($scoresptr->{q{'.$rulename.'}}) {
+ '.$rulename.'_rawbody_test($self, @_);
+ '.$self->ran_rule_plugin_code($rulename, "rawbody").'
+ }
+ ';
+ }
+ else {
+ $loopid++;
+ $evalstr .= '
+ if ($scoresptr->{q{'.$rulename.'}}) {
+ rawbody_'.$loopid.': foreach my $l (@_) {
+ pos $l = 0;
+ '.$self->hash_line_for_rule($rulename).'
+ while ($l =~ '.$pat.'g) {
+ $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
+ '. $self->hit_rule_plugin_code($rulename, "rawbody",
+ "last rawbody_".$loopid) . '
+ }
+ }
+ '.$self->ran_rule_plugin_code($rulename, "rawbody").'
+ }
+ ';
+ }

if ($doing_user_rules) {
next if (!$self->is_user_rule_sub ($rulename.'_rawbody_test'));
}

- $evalstr2 .= '
- sub '.$rulename.'_rawbody_test {
- my $self = shift;
- foreach (@_) {
- pos = 0;
- '.$self->hash_line_for_rule($rulename).'
- while ('.$pat.'g) {
- $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
- '. $self->hit_rule_plugin_code($rulename, "rawbody", "return") . '
- }
- }
+ if ($use_rule_subs) {
+ $evalstr2 .= '
+ sub '.$rulename.'_rawbody_test {
+ my $self = shift;
+ foreach (@_) {
+ pos = 0;
+ '.$self->hash_line_for_rule($rulename).'
+ while ('.$pat.'g) {
+ $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
+ '. $self->hit_rule_plugin_code($rulename, "rawbody", "return") . '
+ }
+ }
+ }
+ ';
}
- ';
}

# clear out a previous version of this fn, if already defined