Mailing List Archive

svn commit: r483650 - in /spamassassin/trunk: MANIFEST lib/Mail/SpamAssassin/Conf.pm lib/Mail/SpamAssassin/Conf/Parser.pm lib/Mail/SpamAssassin/PerMsgStatus.pm masses/mass-check masses/plugins/Dumpmem.pm t/duplicates.t t/priorities.t
Author: jm
Date: Thu Dec 7 12:37:31 2006
New Revision: 483650

URL: http://svn.apache.org/viewvc?view=rev&rev=483650
Log:
bug 5206: detect duplicate rules, and silently merge them internally for greater efficiency. This results in about 100-120KB RAM usage saving in current svn trunk's ruleset, detecting lots of duplicate rules -- so is well worth doing. also, change t/priorities.t so it doesn't accidentally confuse itself with duplicate rules

Added:
spamassassin/trunk/masses/plugins/Dumpmem.pm
spamassassin/trunk/t/duplicates.t (with props)
Modified:
spamassassin/trunk/MANIFEST
spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/trunk/masses/mass-check
spamassassin/trunk/t/priorities.t

Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?view=diff&rev=483650&r1=483649&r2=483650
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Thu Dec 7 12:37:31 2006
@@ -465,3 +465,4 @@
t/spamd_unix_and_tcp.t
lib/Mail/SpamAssassin/Plugin/VBounce.pm
rules/20_vbounce.cf
+t/duplicates.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?view=diff&rev=483650&r1=483649&r2=483650
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Thu Dec 7 12:37:31 2006
@@ -2629,6 +2629,7 @@
$self->{rawbody_evals} = { };
$self->{meta_tests} = { };
$self->{eval_plugins} = { };
+ $self->{duplicate_rules} = { };

# testing stuff
$self->{regression_tests} = { };

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?view=diff&rev=483650&r1=483649&r2=483650
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Thu Dec 7 12:37:31 2006
@@ -685,6 +685,7 @@

$self->trace_meta_dependencies();
$self->fix_priorities();
+ $self->find_dup_rules(); # must be after fix_priorities()

dbg("conf: finish parsing");

@@ -838,6 +839,51 @@
$pri->{$dep} = $basepri;
}
}
+ }
+}
+
+sub find_dup_rules {
+ my ($self) = @_;
+ my $conf = $self->{conf};
+
+ my %names_for_text = ();
+ my %dups = ();
+ while (my ($name, $text) = each %{$conf->{tests}}) {
+ my $type = $conf->{test_types}->{$name};
+ # ensure similar, but differently-typed, rules are not marked as dups
+ $text = "$type\t$text";
+
+ if (defined $names_for_text{$text}) {
+ $names_for_text{$text} .= " ".$name;
+ $dups{$text} = 1; # found (at least) one
+ } else {
+ $names_for_text{$text} = $name;
+ }
+ }
+
+ foreach my $text (keys %dups) {
+ my $first;
+ my $first_pri;
+ my @names = sort {$a cmp $b} split(' ', $names_for_text{$text});
+ foreach my $name (@names) {
+ my $priority = $conf->{priority}->{$name} || 0;
+
+ if (!defined $first || $priority < $first_pri) {
+ $first_pri = $priority;
+ $first = $name;
+ }
+ }
+ # $first is now the earliest-occurring rule. mark others as dups
+
+ my @dups = ();
+ foreach my $name (@names) {
+ next if $name eq $first;
+ push @dups, $name;
+ delete $conf->{tests}->{$name};
+ }
+
+ dbg("rules: $first has duplicates: ".join(' ', @dups));
+ $conf->{duplicate_rules}->{$first} = \@dups;
}
}


Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?view=diff&rev=483650&r1=483649&r2=483650
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Thu Dec 7 12:37:31 2006
@@ -2115,6 +2115,15 @@
$area,
$params{ruletype},
$self->{conf}->get_description_for_rule($rule) || $rule);
+
+ # take care of duplicate rules, too (bug 5206)
+ my $dups = $self->{conf}->{duplicate_rules}->{$rule};
+ if ($dups && @{$dups}) {
+ foreach my $dup (@{$dups}) {
+ $self->got_hit($dup, $area, %params);
+ }
+ }
+
return 1;
}


Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?view=diff&rev=483650&r1=483649&r2=483650
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Thu Dec 7 12:37:31 2006
@@ -736,8 +736,6 @@
}
}

-# use Mail::SpamAssassin::Util::MemoryDump; Mail::SpamAssassin::Util::MemoryDump::MEMDEBUG(); use Mail::SpamAssassin::Util::MemoryDump; Mail::SpamAssassin::Util::MemoryDump::MEMDEBUG_dump_obj($status); #JMD
-
if (defined $status) { $status->finish(); }
$ma->finish();
undef $ma; # clean 'em up

Added: spamassassin/trunk/masses/plugins/Dumpmem.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/plugins/Dumpmem.pm?view=auto&rev=483650
==============================================================================
--- spamassassin/trunk/masses/plugins/Dumpmem.pm (added)
+++ spamassassin/trunk/masses/plugins/Dumpmem.pm Thu Dec 7 12:37:31 2006
@@ -0,0 +1,24 @@
+package Dumpmem;
+use strict;
+use Mail::SpamAssassin;
+use Mail::SpamAssassin::Plugin;
+our @ISA = qw(Mail::SpamAssassin::Plugin);
+
+use Mail::SpamAssassin::Util::MemoryDump;
+
+sub new {
+ my ($class, $mailsa) = @_;
+ $class = ref($class) || $class;
+ my $self = $class->SUPER::new($mailsa);
+ bless ($self, $class);
+ return $self;
+}
+
+sub per_msg_finish {
+ my ($self, $opts) = @_;
+ Mail::SpamAssassin::Util::MemoryDump::MEMDEBUG();
+ Mail::SpamAssassin::Util::MemoryDump::MEMDEBUG_dump_obj(
+ $opts->{permsgstatus}->{main});
+}
+
+1;

Added: spamassassin/trunk/t/duplicates.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/duplicates.t?view=auto&rev=483650
==============================================================================
--- spamassassin/trunk/t/duplicates.t (added)
+++ spamassassin/trunk/t/duplicates.t Thu Dec 7 12:37:31 2006
@@ -0,0 +1,60 @@
+#!/usr/bin/perl
+
+use lib '.'; use lib 't';
+use SATest; sa_t_init("duplicates");
+use Test; BEGIN { plan tests => 16 };
+
+$ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C'; # a cheat, but we need the patterns to work
+
+# ---------------------------------------------------------------------------
+
+%patterns = (
+
+ q{ FOO1 } => '', # use default names
+ q{ FOO2 } => '',
+ q{ HDR1 } => '',
+ q{ HDR2 } => '',
+ q{ META1 } => '',
+ q{ META2 } => '',
+ q{ META3 } => '',
+ q{ ran body rule FOO1 ======> got hit } => '',
+ q{ ran header rule HDR1 ======> got hit } => '',
+ q{ rules: FOO1 has duplicates: FOO2 } => '',
+ q{ rules: HDR1 has duplicates: HDR2 } => '',
+
+);
+
+%anti_patterns = (
+
+ q{ FOO3 } => '',
+ q{ RAWFOO } => '',
+ q{ ran body rule FOO2 ======> got hit } => '',
+ q{ ran header rule HDR2 ======> got hit } => '',
+
+);
+
+tstprefs (qq{
+
+ $default_cf_lines
+
+ body FOO1 /click here and e= nter your/i
+ body FOO2 /click here and e= nter your/i
+
+ # should not be found, not a dup (/i)
+ body FOO3 /click here and e= nter your/
+
+ # should not be found, not dup since different type
+ rawbody RAWFOO /click here and e= nter your/i
+
+ header HDR1 Subject =~ /stained/
+ header HDR2 Subject =~ /stained/
+
+ meta META1 (1)
+ meta META2 (META1 && META3)
+ meta META3 (1)
+ priority META3 -500
+
+});
+
+sarun ("-L -t -D < data/spam/006 2>&1", \&patterns_run_cb);
+ok ok_all_patterns();

Propchange: spamassassin/trunk/t/duplicates.t
------------------------------------------------------------------------------
svn:executable = *

Modified: spamassassin/trunk/t/priorities.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/priorities.t?view=diff&rev=483650&r1=483649&r2=483650
==============================================================================
--- spamassassin/trunk/t/priorities.t (original)
+++ spamassassin/trunk/t/priorities.t Thu Dec 7 12:37:31 2006
@@ -63,8 +63,8 @@
meta FOO1 (FOO2 && FOO3)
meta FOO2 (1)
meta FOO3 (FOO4 && FOO5)
- meta FOO4 (1)
- meta FOO5 (1)
+ meta FOO4 (2)
+ meta FOO5 (3)
priority FOO5 -23
priority FOO1 -28