Mailing List Archive

svn commit: rev 6839 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin rules
Author: quinlan
Date: Mon Feb 23 20:19:17 2004
New Revision: 6839

Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
incubator/spamassassin/trunk/rules/20_head_tests.cf
incubator/spamassassin/trunk/rules/20_ratware.cf
incubator/spamassassin/trunk/rules/70_testing.cf
Log:
some work on test rules


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm Mon Feb 23 20:19:17 2004
@@ -2722,30 +2722,19 @@
my ($self, $test) = @_;

my $full_to = $self->get('To:addr');
- return 0 unless $full_to; # no To:?
- my $to = $full_to;
- $to =~ s/\@.*$//; # just the username please
+ return 0 unless $full_to;

my $subject = $self->get('Subject');

- my $return = $subject =~ /^\s*\Q$to\E,\S/; # "user,\S" case sensitive
-
- if ( defined $test ) { # test versions
- if ( $test == 1 ) {
- $return = $subject =~ /\b\Q$full_to\E\b/i; # "user@domain.com"
- }
- elsif ( $test == 2 ) {
- $to = ucfirst $to;
- $return = $subject =~ /^\s*\Q$to\E,\S/; # "user,\S" case sensitive (ucfirst)
- }
- elsif ( $test == 3 ) {
- $return = $subject =~ /^\s*\Q$to\E,\S/i; # "user,\S" case insensitive
- }
- elsif ( $test == 4 ) {
- $return = $subject =~ /^\s*\Q$full_to\E\b/i; # "user@domain.com"
- }
+ if ($test eq "address") {
+ return $subject =~ /\b\Q$full_to\E\b/i; # "user@domain.com"
}
- return $return;
+ elsif ($test eq "user") {
+ my $to = $full_to;
+ $to =~ s/\@.*//;
+ return $subject =~ /^\s*\Q$to\E,\S/i; # "user,\S" case insensitive
+ }
+ return 0;
}

###########################################################################
@@ -3260,58 +3249,6 @@
}

dbg ("SPF: query for $sender/$ip/$helo: result: $result, comment: $comment");
-}
-
-###########################################################################
-
-sub check_for_all_relays_near_mxes {
- my ($self) = @_;
-
- return unless $self->is_dns_available();
- return;
-
- # Allow a max 15-second timeout to do this test, looking up all MX and
- # A records.
- # TODO: use the BGSOCK stuff in Dns, and start these along with the RBL
- # queries. May be pointless if the accuracy is poor though.
-
- my $timeout = $self->{conf}->{rbl_timeout};
- my $allmxesnear = 0;
-
- eval {
- local $SIG{ALRM} = sub { die "alarm\n" };
- alarm($timeout);
-
- foreach my $relay (@{$self->{relays_untrusted}}) {
- if (!$self->mx_of_helo_near_ip ($relay->{helo}, $relay->{ip})) {
- dbg ("helo $relay->{helo} is not near $relay->{ip}");
- die "notnear";
- } else {
- dbg ("helo $relay->{helo} is near $relay->{ip}");
- }
- }
-
- $allmxesnear = 1; # completed without dying
-
- };
- alarm(0); # if we die'd above, need to reset here
-
- if ($@) {
- if ($@ =~ /alarm/) {
- dbg ("all-MXes check timed out after $timeout secs.");
- } elsif ($@ =~ /notnear/) {
- # fine! just return
- } else {
- warn ("all-MXes -> check skipped: $! $@");
- }
- return 0;
- }
-
- # note: an empty @{$self->{relays_untrusted}} is fine -- it means
- # either the message originating locally, or the trail was trusted
- # all the way to the source. Both are good news!
-
- return 1;
}

###########################################################################

Modified: incubator/spamassassin/trunk/rules/20_head_tests.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/20_head_tests.cf (original)
+++ incubator/spamassassin/trunk/rules/20_head_tests.cf Mon Feb 23 20:19:17 2004
@@ -530,8 +530,11 @@
header RCVD_FAKE_HELO_DOTCOM Received =~ /^from (?:msn|yahoo|yourwebsite|lycos|excite|cs|aol|localhost|koreanmail|allexecs|mydomain|juno|eudoramail|compuserve|desertmail|excite|caramail)\.com \(/m
describe RCVD_FAKE_HELO_DOTCOM Received contains a faked HELO hostname

-header USERNAME_IN_SUBJECT eval:check_for_to_in_subject()
-describe USERNAME_IN_SUBJECT To: username at front of subject
+header USERNAME_IN_SUBJECT eval:check_for_to_in_subject('user')
+describe USERNAME_IN_SUBJECT To: username listed at front of Subject
+
+header ADDRESS_IN_SUBJECT eval:check_for_to_in_subject('address')
+describe ADDRESS_IN_SUBJECT To: address listed at front of Subject

header LOSE_POUNDS Subject =~ /\bLose .*(?:pounds|lbs|weight)/i
describe LOSE_POUNDS Subject talks about losing pounds
@@ -732,6 +735,9 @@

header X_ORIG_HOST X-Originating-Host =~ /^\[./
describe X_ORIG_HOST Message has X-Originating-Host header
+
+header X_ORIG_IP_NOT_IPV4 X-Originating-IP !~ /\[?(?:\d{1,3}\.){3}\d{1,3}\]?/ [if-unset: 0.0.0.0]
+describe X_ORIG_IP_NOT_IPV4 X-Originating-IP doesn't look like IPv4 address

# Hotmail's DAV interface uses this and it's heavily exploited right now. As
# far as I can tell, it requires an msn.com or hotmail.com X-Originating-Email:

Modified: incubator/spamassassin/trunk/rules/20_ratware.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/20_ratware.cf (original)
+++ incubator/spamassassin/trunk/rules/20_ratware.cf Mon Feb 23 20:19:17 2004
@@ -246,3 +246,12 @@
# http://groups.google.com/groups?selm=atp1ip0n22%40enews3.newsguy.com
rawbody RATWARE_HASH_DASH /[a-z\d]+-([a-z\d]{16}-)+[a-z\d]+(?-i:l)\d+/i
describe RATWARE_HASH_DASH Contains a hashbuster in Send-Safe format
+
+# spammer tool, sometimes has "netIP with HTTP;" in Received: header
+header RATWARE_NETIP Content-Type =~ /boundary="--ALT--[A-Z]{4}\d/
+describe RATWARE_NETIP Bulk email fingerprint (netIP) found
+
+# this is really badly faked. Also the spammer who uses "25250101"
+# for the build is a total hippie.
+header RATWARE_GECKO_BUILD User-Agent =~ /Gecko\/(?!200\d\d\d\d\d)\d/
+describe RATWARE_GECKO_BUILD Bulk email fingerprint (Gecko faked) found

Modified: incubator/spamassassin/trunk/rules/70_testing.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/70_testing.cf (original)
+++ incubator/spamassassin/trunk/rules/70_testing.cf Mon Feb 23 20:19:17 2004
@@ -27,34 +27,23 @@
###########################################################################

# http://bugzilla.spamassassin.org/show_bug.cgi?id=2088
+# low hit rate
+# 0.091 0.1174 0.0000 1.000 0.93 0.01 T_RATWARE_MIME_844412
header T_RATWARE_MIME_844412 Content-Type =~ /boundary="[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"/

# http://bugzilla.spamassassin.org/show_bug.cgi?id=2087
+# low hit rate
+# 0.069 0.0895 0.0000 1.000 0.93 0.01 T_RATWARE_OE_DM
header T_RATWARE_OE_DM X-Mailer =~ /^Microsoft Outlook Express [\d\.]+ DM$/

-# a referral ID
-uri T_REF_ID /[\?\&]RefID/
-
-# http://bugzilla.spamassassin.org/show_bug.cgi?id=2089
-header T_DATE_EXTRA_SPACE Date =~ /^..., .\d ... \d\d\d\d \d\d:\d\d:\d\d [\+\-]\d\d\d\d$/
-
-# another one for bug 2089, may be more useful:
-full __END_HASHBUSTER_1 /\n\[[a-z0-9]+\]\n\s*\Z/
-meta T_RATWARE_2089 (NO_REAL_NAME && __END_HASHBUSTER_1)
-
-# this is really badly faked. Also the spammer who uses "25250101"
-# for the build is a total hippie.
-header T_RATWARE_GECKO_BUILD User-Agent =~ /Gecko\/(?!200\d\d\d\d\d)\d/
-
-# another good way to catch mozilla fakery
-header __UA_GECKO User-Agent =~ /Gecko\//
-header __EXISTS_ACCEPT_LANG exists:X-Accept-Language
-meta T_RATWARE_GECKO_NO_LANG (__UA_GECKO && !__EXISTS_ACCEPT_LANG)
-
# reminder: develop these after 2.60
+# low hit rate
+# 0.093 0.1201 0.0000 1.000 0.93 0.01 T_SPRINTF_5X
full T_SPRINTF_5X /[^-][A-F1-9][A-F0-9]{5,7}-[A-F1-9][A-F0-9]{5,7}-[A-F1-9][A-F0-9]{5,7}-[A-F1-9][A-F0-9]{5,7}-[A-F1-9][A-F0-9]{5,7}[^-]/

# (time_t/4444)
+# low hit rate
+# 0.115 0.1174 0.1061 0.525 0.13 0.01 T_TIME_OVER_4444
header T_TIME_OVER_4444 ALL =~ /\D23[67][0-9][0-9][0-9]\D/

# replacements for PORN_4; split out sub-patterns as some are more FP-prone
@@ -70,19 +59,6 @@
uri T_PORN_URL_TEEN /^https?:\/\/[\w\.-]*(?<!thir|four|eigh|nine)(?<!fif|six)(?<!seven)teen(?!th)[\w-]*\./
uri T_PORN_URL_MISC /^https?:\/\/[\w\.-]*(pussy|nympho|porn|hard-?core|taboo|whore|voyeur|lesbian|gurlpages|naughty|lolita|schoolgirl|kooloffer|erotic)[\w-]*\./

-header T_DATE_DOUBLE_DASH Date =~ /:\d\d --\d\d\d\d$/
-
-header __RCVD_IN_SORBS_RHSBL eval:check_rbl_from_host('sorbsrhs', 'rhsbl.sorbs.net.')
-tflags __RCVD_IN_SORBS_RHSBL net
-
-header T_RCVD_IN_SORBS_BADCONF eval:check_rbl_sub('sorbsrhs', '127.0.0.11')
-describe T_RCVD_IN_SORBS_BADCONF SORBS: sender uses invalid DNS A or MX records
-tflags T_RCVD_IN_SORBS_BADCONF net
-
-header T_RCVD_IN_SORBS_NOMAIL eval:check_rbl_sub('sorbsrhs', '127.0.0.12')
-describe T_RCVD_IN_SORBS_NOMAIL SORBS: sender is not expected to send mail
-tflags T_RCVD_IN_SORBS_NOMAIL net
-
# test XBL with -notfirsthop
# Note: can't use check_rbl_sub, but can rely on DNSBL caching to avoid
# duplicate queries of sbl-xbl.
@@ -90,6 +66,11 @@
describe T_RCVD_IN_XBL_NFH Received via a relay in Spamhaus XBL
tflags T_RCVD_IN_XBL_NFH net

+# ugh, is that right?
+header T_RCVD_IN_XBL_NFH_2 eval:check_rbl_txt('xbl-notfirsthop', 'xbl.spamhaus.org.')
+describe T_RCVD_IN_XBL_NFH_2 Received via a relay in Spamhaus XBL
+tflags T_RCVD_IN_XBL_NFH_2 net
+
# SPF support. "pass" is nice, "fail" is bad, "softfail" is bad, but
# not as bad as "fail".
header T_SPF_PASS eval:check_for_spf_pass()
@@ -112,31 +93,6 @@
tflags T_SPF_HELO_SOFTFAIL net
score T_SPF_HELO_SOFTFAIL 0.1

-# Not good, esp. considering how *slow* it runs..
-# 17.640 4.1041 29.0741 0.124 0.69 -0.10 T_ALL_RELAYS_NEAR_MXES
-# However, combined with SBL it might work out useful...
-#header T_ALL_RELAYS_NEAR_MXES eval:check_for_all_relays_near_mxes()
-#tflags T_ALL_RELAYS_NEAR_MXES net nice
-#score T_ALL_RELAYS_NEAR_MXES -0.1
-#describe T_ALL_RELAYS_NEAR_MXES All relays are near to their MXes
-
-# try out new versions of username in subject ...
-# "user@domain.com" in the subject, case insensitive
-header T_USERNAME_IN_SUBJECT1 eval:check_for_to_in_subject('1')
-describe T_USERNAME_IN_SUBJECT1 Full To: address listed in Subject:
-header T_USERNAME_IN_SUBJECT2 eval:check_for_to_in_subject('2')
-describe T_USERNAME_IN_SUBJECT2 To: username listed in Subject: (ucfirst)
-header T_USERNAME_IN_SUBJECT3 eval:check_for_to_in_subject('3')
-describe T_USERNAME_IN_SUBJECT3 Full To: address listed at front of Subject:
-
-# This is hitting nothing. Maybe it's gone again...
-header T_RCVD_IN_PDL rbleval:check_rbl_txt('pdl-notfirsthop', 'dialups.visi.com.')
-describe T_RCVD_IN_PDL Received via a relay in PDL, http://www.pan-am.ca/pdl/
-tflags T_RCVD_IN_PDL net
-
-rawbody T_RNDMX /<rndmx\b/
-describe T_RNDMX Contains 'rndmx' hashbuster code
-
########################################################################
# This ratware always uses a +0000 TZ in the Date header, and has a multiplicity
# of From: header formats. ("From" header samples from Steven Champeon
@@ -291,17 +247,6 @@
header T_ALL_TRUSTED eval:check_all_trusted()
describe T_ALL_TRUSTED Did not pass through any untrusted hosts
tflags T_ALL_TRUSTED nice
-
-# both aspects of same spammer tool
-header T_NETIP_RCVD Received =~ /netIP with HTTP\;/
-header T_NETIP_BOUND Content-Type =~ /boundary="--ALT--[A-Z]{4}\d/
-
-# several variants of same rule idea
-header T_XORIGIP_NOT_IPV4_1 X-Originating-IP !~ /\[?(?:\d{1,3}\.){3}\d{1,3}\]?/ [if-unset: 0.0.0.0]
-header T_XORIGIP_NOT_IPV4_2 X-Originating-IP !~ /^\[?(?:\d{1,3}\.){3}\d{1,3}\]?$/ [if-unset: 0.0.0.0]
-header T_XORIGIP_NOT_IPV4_3 X-Originating-IP !~ /^\s*\[?(?:\d{1,3}\.){3}\d{1,3}\]?\s*$/ [if-unset: 0.0.0.0]
-header T_XORIGIP_NOT_IPV4_4 X-Originating-IP !~ /^[^\d.]*\[?(?:\d{1,3}\.){3}\d{1,3}\]?[^\d.]*$/ [if-unset: 0.0.0.0]
-describe T_XORIGIP_NOT_IPV4 X-Originating-IP doesn't look like IPv4 address

# some tests to catch long lines of random dictionary words
# this could be slow, being a rawbody rule, but if it works well maybe