Mailing List Archive

svn commit: rev 6570 - in incubator/spamassassin/trunk: . lib/Mail/SpamAssassin rules t t/data/nice
Author: jm
Date: Sat Feb 7 15:47:08 2004
New Revision: 6570

Added:
incubator/spamassassin/trunk/t/data/nice/not_gtube.eml
Modified:
incubator/spamassassin/trunk/MANIFEST
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
incubator/spamassassin/trunk/rules/20_body_tests.cf
incubator/spamassassin/trunk/rules/60_whitelist.cf
incubator/spamassassin/trunk/t/gtube.t
Log:
bug 2898: GTUBE mails should not affect the AWL

Modified: incubator/spamassassin/trunk/MANIFEST
==============================================================================
--- incubator/spamassassin/trunk/MANIFEST (original)
+++ incubator/spamassassin/trunk/MANIFEST Sat Feb 7 15:47:08 2004
@@ -303,3 +303,4 @@
masses/rule-qa/corpus-nightly
masses/rule-qa/corpus-tagtime
tools/desc_length.pl
+t/data/nice/not_gtube.eml

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm Sat Feb 7 15:47:08 2004
@@ -962,6 +962,8 @@
}
}

+ my $awlhits = $self->get_nonlearn_nonuserconf_hits();
+
# Create the AWL object, catching 'die's
my $whitelist;
my $evalok = eval {
@@ -971,11 +973,11 @@
my $meanscore = $whitelist->check_address($_, $origip);
my $delta = 0;

- dbg("AWL active, pre-score: " . $self->{hits} . ", mean: " .
- ($meanscore || 'undef') . ", IP: " . ($origip || 'undef'));
+ dbg("AWL active, pre-score: $self->{hits}, autolearn score: $awlhits, ".
+ "mean: ". ($meanscore || 'undef') .", IP: ". ($origip || 'undef'));

if (defined ($meanscore)) {
- $delta = ($meanscore - $self->{hits}) * $self->{main}->{conf}->{auto_whitelist_factor};
+ $delta = ($meanscore - $awlhits) * $self->{main}->{conf}->{auto_whitelist_factor};
$self->{tag_data}->{AWL} = sprintf("%2.1f",$delta);
# Save this for _AWL_ tag
}
@@ -984,7 +986,7 @@
# early high-scoring messages are reinforced compared to
# later ones. http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=159704
if (!$self->{disable_auto_learning}) {
- $whitelist->add_score($self->{hits});
+ $whitelist->add_score($awlhits);
}

# current AWL score changes with each hit
@@ -997,7 +999,6 @@
$self->{main}->{conf}->{descriptions}->{AWL});
}

- dbg("Post AWL score: ".$self->{hits});
$whitelist->finish();
1;
};
@@ -1007,6 +1008,8 @@
# try an unlock, in case we got that far
eval { $whitelist->finish(); };
}
+
+ dbg("Post AWL score: ".$self->{hits});

# test hit is above
return 0;

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sat Feb 7 15:47:08 2004
@@ -279,14 +279,7 @@
my $new_scoreset = $orig_scoreset & ~2;
dbg ("auto-learn: currently using scoreset $orig_scoreset. recomputing score based on scoreset $new_scoreset.");
$self->{conf}->set_score_set($new_scoreset); # reduce to autolearning scores
- foreach my $test ( @{$self->{test_names_hit}} ) {
- # ignore tests with 0 score in this scoreset or if the test is a learning or userconf test
- next if ( $self->{conf}->{scores}->{$test} == 0 );
- next if ( exists $self->{conf}->{tflags}->{$test} && $self->{conf}->{tflags}->{$test} =~ /\bnoautolearn\b/ );
-
- $hits += $self->{conf}->{scores}->{$test};
- }
- $hits = (sprintf "%0.3f", $hits) + 0;
+ my $hits = $self->get_nonlearn_nonuserconf_hits();
dbg ("auto-learn: original score: ".$self->{hits}.", recomputed score: $hits");
$self->{conf}->set_score_set($orig_scoreset); # return to appropriate scoreset
}
@@ -360,6 +353,26 @@
dbg ("auto-learning failed: $@");
$self->{auto_learn_status} = "failed";
}
+}
+
+sub get_nonlearn_nonuserconf_hits {
+ my ($self) = @_;
+
+ my $scores = $self->{conf}->{scores};
+ my $tflags = $self->{conf}->{tflags};
+ my $hits = 0;
+
+ foreach my $test ( @{$self->{test_names_hit}} )
+ {
+ # ignore tests with 0 score in this scoreset,
+ # or if the test is a learning or userconf test
+ next if ($scores->{$test} == 0);
+ next if (exists $tflags->{$test} && $tflags->{$test} =~ /\bnoautolearn\b/);
+
+ $hits += $scores->{$test};
+ }
+
+ return (sprintf "%0.3f", $hits) + 0;
}

###########################################################################

Modified: incubator/spamassassin/trunk/rules/20_body_tests.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/20_body_tests.cf (original)
+++ incubator/spamassassin/trunk/rules/20_body_tests.cf Sat Feb 7 15:47:08 2004
@@ -35,7 +35,7 @@
# GTUBE test - the generic test for UBE.
body GTUBE /XJS\*C4JDBQADN1\.NSBN3\*2IDNEN\*GTUBE-STANDARD-ANTI-UBE-TEST-EMAIL\*C\.34X/
describe GTUBE Generic Test for Unsolicited Bulk Email
-tflags GTUBE userconf
+tflags GTUBE userconf noautolearn

###########################################################################
# Message digest tests

Modified: incubator/spamassassin/trunk/rules/60_whitelist.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/60_whitelist.cf (original)
+++ incubator/spamassassin/trunk/rules/60_whitelist.cf Sat Feb 7 15:47:08 2004
@@ -22,19 +22,26 @@

###########################################################################
# Whitelist rules
+#
+# Note that most of these get 'noautolearn'. They should not be
+# considered when deciding whether to auto-learn a message, as a
+# user slip-up could result in scribbling side-effects in the bayes
+# db as a result -- which is hard to remedy.

-# note: the test name is hard-coded several places, change everywhere
+# TODO: this test name is hard-coded several places, change everywhere
header AWL eval:check_from_in_auto_whitelist()
describe AWL From: address is in the auto white-list
tflags AWL userconf noautolearn

+# we *do* allow the user to autolearn based on this, because it's hard
+# to accidentally blacklist someone's From address.
header USER_IN_BLACKLIST eval:check_from_in_blacklist()
describe USER_IN_BLACKLIST From: address is in the user's black-list
tflags USER_IN_BLACKLIST userconf

header USER_IN_WHITELIST eval:check_from_in_whitelist()
describe USER_IN_WHITELIST From: address is in the user's white-list
-tflags USER_IN_WHITELIST userconf nice
+tflags USER_IN_WHITELIST userconf nice noautolearn

header USER_IN_DEF_WHITELIST eval:check_from_in_default_whitelist()
describe USER_IN_DEF_WHITELIST From: address is in the default white-list
@@ -42,19 +49,19 @@

header USER_IN_BLACKLIST_TO eval:check_to_in_blacklist()
describe USER_IN_BLACKLIST_TO User is listed in 'blacklist_to'
-tflags USER_IN_BLACKLIST_TO userconf nice
+tflags USER_IN_BLACKLIST_TO userconf nice noautolearn

header USER_IN_WHITELIST_TO eval:check_to_in_whitelist()
describe USER_IN_WHITELIST_TO User is listed in 'whitelist_to'
-tflags USER_IN_WHITELIST_TO userconf nice
+tflags USER_IN_WHITELIST_TO userconf nice noautolearn

header USER_IN_MORE_SPAM_TO eval:check_to_in_more_spam()
describe USER_IN_MORE_SPAM_TO User is listed in 'more_spam_to'
-tflags USER_IN_MORE_SPAM_TO userconf nice
+tflags USER_IN_MORE_SPAM_TO userconf nice noautolearn

header USER_IN_ALL_SPAM_TO eval:check_to_in_all_spam()
describe USER_IN_ALL_SPAM_TO User is listed in 'all_spam_to'
-tflags USER_IN_ALL_SPAM_TO userconf nice
+tflags USER_IN_ALL_SPAM_TO userconf nice noautolearn

###########################################################################
# Default whitelists. These should be addresses which send mail that is often

Added: incubator/spamassassin/trunk/t/data/nice/not_gtube.eml
==============================================================================
--- (empty file)
+++ incubator/spamassassin/trunk/t/data/nice/not_gtube.eml Sat Feb 7 15:47:08 2004
@@ -0,0 +1,10 @@
+Message-ID: <GTUBE1.1010101@example.com>
+Date: Mon, 07 Oct 2002 09:00:00 +0000
+From: Sender <sender@example.com>
+MIME-Version: 1.0
+To: Recipient <recipient@example.com>
+Subject: Not The GTUBE
+Content-Type: text/plain; charset=us-ascii; format=flowed
+Content-Transfer-Encoding: 7bit
+
+What, I sent you the GTUBE? oops!

Modified: incubator/spamassassin/trunk/t/gtube.t
==============================================================================
--- incubator/spamassassin/trunk/t/gtube.t (original)
+++ incubator/spamassassin/trunk/t/gtube.t Sat Feb 7 15:47:08 2004
@@ -2,7 +2,7 @@

use lib '.'; use lib 't';
use SATest; sa_t_init("spam");
-use Test; BEGIN { plan tests => 2 };
+use Test; BEGIN { plan tests => 4 };

# ---------------------------------------------------------------------------

@@ -10,8 +10,24 @@

q{ GTUBE }, 'gtube',

-
);

+tstprefs ("
+ $default_cf_lines
+ use_auto_whitelist 1
+ auto_whitelist_path ./log/awl
+ auto_whitelist_file_mode 0755
+");
+
ok (sarun ("-L -t < data/spam/gtube.eml", \&patterns_run_cb));
ok_all_patterns();
+
+%patterns = (
+
+q{ X-Spam-Status: No }, 'not_marked_as_spam_from_awl_bonus',
+
+);
+
+ok (sarun ("-L -t < data/nice/not_gtube.eml", \&patterns_run_cb));
+ok_all_patterns();
+