Mailing List Archive

svn commit: rev 6610 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin rules
Author: quinlan
Date: Tue Feb 10 21:02:26 2004
New Revision: 6610

Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
incubator/spamassassin/trunk/rules/70_testing.cf
Log:
add HTML order rules and some possible HTML_MESSAGE replacements


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm Tue Feb 10 21:02:26 2004
@@ -3397,6 +3397,55 @@
}
}

+sub html_order {
+ my ($self, undef, $type, $one, $two) = @_;
+
+ return 0 unless defined @{ $self->{html}{order} };
+
+ my $last = 'undef';
+
+ # type test on outside of loop should be faster
+ if ($type eq "any") {
+ for my $tag (@{ $self->{html}{order} }) {
+ return 1 if $last eq $one && $tag eq $two;
+ $last = $tag;
+ }
+ }
+ elsif ($type eq "ignore") {
+ for (@{ $self->{html}{order} }) {
+ my $tag = $_;
+ $tag =~ s@^/@@;
+ return 1 if $last eq $one && $tag eq $two;
+ $last = $tag;
+ }
+ }
+ elsif ($type eq "open") {
+ for my $tag (@{ $self->{html}{order} }) {
+ next if substr($tag, 0, 1) eq "/";
+ return 1 if $last eq $one && $tag eq $two;
+ $last = $tag;
+ }
+ }
+ elsif ($type eq "close") {
+ for my $tag (@{ $self->{html}{order} }) {
+ next if substr($tag, 0, 1) ne "/";
+ return 1 if $last eq $one && $tag eq $two;
+ $last = $tag;
+ }
+ }
+ elsif ($type eq "range") {
+ my %seen;
+ my $count = 0;
+ for my $tag (@{ $self->{html}{order} }) {
+ $count++ unless $seen{"$last $tag"}++;
+ $last = $tag;
+ }
+ return (($one eq 'undef' || $count >= $one) &&
+ ($two eq 'undef' || $count < $two));
+ }
+ return 0;
+}
+
###########################################################################

sub check_hashcash_value {

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm Tue Feb 10 21:02:26 2004
@@ -159,6 +159,8 @@
$self->{html}{"inside_$tag"} += $num;
$self->{html}{"inside_$tag"} = 0 if $self->{html}{"inside_$tag"} < 0;

+ push @{$self->{html}{order}}, ($num > 0 ? "" : "/") . $tag;
+
if ($tag =~ /^(?:body|table|tr|th|td)$/) {
$self->html_bgcolor($tag, $attr, $num);
}

Modified: incubator/spamassassin/trunk/rules/70_testing.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/70_testing.cf (original)
+++ incubator/spamassassin/trunk/rules/70_testing.cf Tue Feb 10 21:02:26 2004
@@ -342,6 +342,11 @@
body T_HTML_BADTAGS_U_80_90 eval:html_range('t_bad_tag_unique_ratio','0.80','0.90')
body T_HTML_BADTAGS_U_90_100 eval:html_range('t_bad_tag_unique_ratio','0.90','1.00')

+# possible HTML_MESSAGE replacements
+body T_HTML_MESSAGE_1 eval:html_eval('html_text','!= undef')
+body T_HTML_MESSAGE_2 eval:html_eval('html_text','ne ""')
+body T_HTML_MESSAGE_3 eval:html_eval('tags','> 0')
+
# more portable replacement for RCVD_NUMERIC_HELO that doesn't rely on
# Received headers using "helo=" prefix
header T_RCVD_NUMERIC_HELO X-Spam-Relays-Untrusted =~ / helo=\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} /
@@ -572,3 +577,176 @@
# same ratware; forges dates as 'with SMTP; Jan, 17 2004 22:05:48 -0300'
header T_DATE_COMMA Received =~ /\bwith \S+\; [A-Z][a-z][a-z],\s+\d\d\s+\d\d\d\d\s/

+########################################################################
+# slew of HTML order rules
+#
+# these were originally autogenerated some months ago by dumping HTML
+# tag orders for spam and ham, but the dumps were not separated by
+# message, so some good rules may have been missed
+
+body T_RANGE_100_110 eval:html_order('range', '100', '110')
+body T_RANGE_110_120 eval:html_order('range', '110', '120')
+body T_RANGE_120_130 eval:html_order('range', '120', '130')
+body T_RANGE_130_140 eval:html_order('range', '130', '140')
+body T_RANGE_140_150 eval:html_order('range', '140', '150')
+body T_RANGE_150_160 eval:html_order('range', '150', '160')
+body T_RANGE_160_170 eval:html_order('range', '160', '170')
+body T_RANGE_170_180 eval:html_order('range', '170', '180')
+body T_RANGE_180_190 eval:html_order('range', '180', '190')
+body T_RANGE_190_200 eval:html_order('range', '190', '200')
+body T_RANGE_200_210 eval:html_order('range', '200', '210')
+body T_RANGE_210_220 eval:html_order('range', '210', '220')
+body T_RANGE_220_230 eval:html_order('range', '220', '230')
+body T_RANGE_230_240 eval:html_order('range', '230', '240')
+body T_RANGE_240_250 eval:html_order('range', '240', '250')
+body T_RANGE_250_260 eval:html_order('range', '250', '260')
+body T_RANGE_260_270 eval:html_order('range', '260', '270')
+body T_RANGE_270_280 eval:html_order('range', '270', '280')
+body T_RANGE_280_290 eval:html_order('range', '280', '290')
+body T_RANGE_290_300 eval:html_order('range', '290', '300')
+
+body T_ANY_0A_BR eval:html_order('any', 'br', '/a')
+body T_ANY_0BODY_0HTML eval:html_order('any', '/html', '/body')
+body T_ANY_0CENTER_0FONT eval:html_order('any', '/center', '/font')
+body T_ANY_0CENTER_0FONT eval:html_order('any', '/font', '/center')
+body T_ANY_0CENTER_0P eval:html_order('any', '/center', '/p')
+body T_ANY_0CENTER_BR eval:html_order('any', 'br', '/center')
+body T_ANY_0CENTER_IMG eval:html_order('any', '/center', 'img')
+body T_ANY_0CENTER_IMG eval:html_order('any', 'img', '/center')
+body T_ANY_0DIV_IMG eval:html_order('any', '/div', 'img')
+body T_ANY_0DIV_IMG eval:html_order('any', 'img', '/div')
+body T_ANY_0FONT_0CENTER eval:html_order('any', '/center', '/font')
+body T_ANY_0FONT_0CENTER eval:html_order('any', '/font', '/center')
+body T_ANY_0HTML_0BODY eval:html_order('any', '/body', '/html')
+body T_ANY_0HTML_0BODY eval:html_order('any', '/html', '/body')
+body T_ANY_0P_0CENTER eval:html_order('any', '/center', '/p')
+body T_ANY_0P_0P eval:html_order('any', '/p', '/p')
+body T_ANY_0TABLE_0CENTER eval:html_order('any', '/center', '/table')
+body T_ANY_0TABLE_0CENTER eval:html_order('any', '/table', '/center')
+body T_ANY_BODY_A eval:html_order('any', 'a', 'body')
+body T_ANY_BODY_A eval:html_order('any', 'body', 'a')
+body T_ANY_BR_0A eval:html_order('any', '/a', 'br')
+body T_ANY_BR_0A eval:html_order('any', 'br', '/a')
+body T_ANY_BR_0CENTER eval:html_order('any', '/center', 'br')
+body T_ANY_BR_0CENTER eval:html_order('any', 'br', '/center')
+body T_ANY_BR_A eval:html_order('any', 'a', 'br')
+body T_ANY_B_A eval:html_order('any', 'a', 'b')
+body T_ANY_B_CENTER eval:html_order('any', 'b', 'center')
+body T_ANY_B_CENTER eval:html_order('any', 'center', 'b')
+body T_ANY_CENTER_A eval:html_order('any', 'a', 'center')
+body T_ANY_CENTER_A eval:html_order('any', 'center', 'a')
+body T_ANY_CENTER_B eval:html_order('any', 'b', 'center')
+body T_ANY_CENTER_B eval:html_order('any', 'center', 'b')
+body T_ANY_CENTER_DIV eval:html_order('any', 'center', 'div')
+body T_ANY_CENTER_DIV eval:html_order('any', 'div', 'center')
+body T_ANY_CENTER_FONT eval:html_order('any', 'center', 'font')
+body T_ANY_CENTER_FONT eval:html_order('any', 'font', 'center')
+body T_ANY_DIV_CENTER eval:html_order('any', 'center', 'div')
+body T_ANY_DIV_CENTER eval:html_order('any', 'div', 'center')
+body T_ANY_FONT_CENTER eval:html_order('any', 'center', 'font')
+body T_ANY_FONT_CENTER eval:html_order('any', 'font', 'center')
+body T_ANY_HTML_CENTER eval:html_order('any', 'center', 'html')
+body T_ANY_HTML_CENTER eval:html_order('any', 'html', 'center')
+body T_ANY_IMG_0CENTER eval:html_order('any', '/center', 'img')
+body T_ANY_IMG_0CENTER eval:html_order('any', 'img', '/center')
+body T_ANY_IMG_0DIV eval:html_order('any', '/div', 'img')
+body T_ANY_IMG_0DIV eval:html_order('any', 'img', '/div')
+body T_ANY_P_B eval:html_order('any', 'b', 'p')
+body T_ANY_UNDEF_BODY eval:html_order('any', 'body', 'undef')
+body T_ANY_UNDEF_BODY eval:html_order('any', 'undef', 'body')
+
+body T_CLOSE_0A_0CENTER eval:html_order('any', '/a', '/center')
+body T_CLOSE_0A_0CENTER eval:html_order('close', '/center', '/a')
+body T_CLOSE_0BODY_0HTML eval:html_order('close', '/html', '/body')
+body T_CLOSE_0CENTER_0A eval:html_order('any', '/center', '/a')
+body T_CLOSE_0CENTER_0A eval:html_order('close', '/a', '/center')
+body T_CLOSE_0CENTER_0FONT eval:html_order('any', '/center', '/font')
+body T_CLOSE_0CENTER_0FONT eval:html_order('close', '/font', '/center')
+body T_CLOSE_0CENTER_0P eval:html_order('any', '/center', '/p')
+body T_CLOSE_0CENTER_0P eval:html_order('close', '/center', '/p')
+body T_CLOSE_0CENTER_0P eval:html_order('close', '/p', '/center')
+body T_CLOSE_0FONT_0CENTER eval:html_order('any', '/font', '/center')
+body T_CLOSE_0FONT_0CENTER eval:html_order('close', '/center', '/font')
+body T_CLOSE_0P_0CENTER eval:html_order('close', '/center', '/p')
+
+body T_IGNORE_A_BODY eval:html_order('ignore', 'body', 'a')
+body T_IGNORE_A_CENTER eval:html_order('ignore', 'a', 'center')
+body T_IGNORE_A_CENTER eval:html_order('ignore', 'center', 'a')
+body T_IGNORE_A_P eval:html_order('ignore', 'p', 'a')
+body T_IGNORE_BODY_A eval:html_order('ignore', 'a', 'body')
+body T_IGNORE_BODY_A eval:html_order('ignore', 'body', 'a')
+body T_IGNORE_BODY_CENTER eval:html_order('ignore', 'center', 'body')
+body T_IGNORE_BODY_HTML eval:html_order('ignore', 'html', 'body')
+body T_IGNORE_B_CENTER eval:html_order('ignore', 'b', 'center')
+body T_IGNORE_B_CENTER eval:html_order('ignore', 'center', 'b')
+body T_IGNORE_CENTER_A eval:html_order('ignore', 'a', 'center')
+body T_IGNORE_CENTER_A eval:html_order('ignore', 'center', 'a')
+body T_IGNORE_CENTER_B eval:html_order('ignore', 'b', 'center')
+body T_IGNORE_CENTER_B eval:html_order('ignore', 'center', 'b')
+body T_IGNORE_CENTER_BODY eval:html_order('ignore', 'body', 'center')
+body T_IGNORE_CENTER_BR eval:html_order('ignore', 'br', 'center')
+body T_IGNORE_CENTER_DIV eval:html_order('ignore', 'center', 'div')
+body T_IGNORE_CENTER_DIV eval:html_order('ignore', 'div', 'center')
+body T_IGNORE_CENTER_FONT eval:html_order('ignore', 'center', 'font')
+body T_IGNORE_CENTER_FONT eval:html_order('ignore', 'font', 'center')
+body T_IGNORE_CENTER_HTML eval:html_order('ignore', 'center', 'html')
+body T_IGNORE_CENTER_HTML eval:html_order('ignore', 'html', 'center')
+body T_IGNORE_CENTER_IMG eval:html_order('ignore', 'center', 'img')
+body T_IGNORE_CENTER_IMG eval:html_order('ignore', 'img', 'center')
+body T_IGNORE_CENTER_P eval:html_order('ignore', 'p', 'center')
+body T_IGNORE_CENTER_TABLE eval:html_order('ignore', 'center', 'table')
+body T_IGNORE_CENTER_TABLE eval:html_order('ignore', 'table', 'center')
+body T_IGNORE_DIV_CENTER eval:html_order('ignore', 'center', 'div')
+body T_IGNORE_DIV_CENTER eval:html_order('ignore', 'div', 'center')
+body T_IGNORE_DIV_IMG eval:html_order('ignore', 'div', 'img')
+body T_IGNORE_DIV_IMG eval:html_order('ignore', 'img', 'div')
+body T_IGNORE_FONT_CENTER eval:html_order('ignore', 'center', 'font')
+body T_IGNORE_FONT_CENTER eval:html_order('ignore', 'font', 'center')
+body T_IGNORE_FONT_TABLE eval:html_order('ignore', 'table', 'font')
+body T_IGNORE_HTML_CENTER eval:html_order('ignore', 'center', 'html')
+body T_IGNORE_HTML_CENTER eval:html_order('ignore', 'html', 'center')
+body T_IGNORE_IMG_CENTER eval:html_order('ignore', 'center', 'img')
+body T_IGNORE_IMG_CENTER eval:html_order('ignore', 'img', 'center')
+body T_IGNORE_IMG_DIV eval:html_order('ignore', 'div', 'img')
+body T_IGNORE_IMG_DIV eval:html_order('ignore', 'img', 'div')
+body T_IGNORE_P_CENTER eval:html_order('ignore', 'center', 'p')
+body T_IGNORE_TABLE_CENTER eval:html_order('ignore', 'center', 'table')
+body T_IGNORE_TABLE_CENTER eval:html_order('ignore', 'table', 'center')
+body T_IGNORE_TABLE_FONT eval:html_order('ignore', 'font', 'table')
+body T_IGNORE_TABLE_FONT eval:html_order('ignore', 'table', 'font')
+body T_IGNORE_TD_CENTER eval:html_order('ignore', 'center', 'td')
+body T_IGNORE_UNDEF_BODY eval:html_order('ignore', 'body', 'undef')
+body T_IGNORE_UNDEF_BODY eval:html_order('ignore', 'undef', 'body')
+
+body T_OPEN_A_CENTER eval:html_order('open', 'center', 'a')
+body T_OPEN_BODY_A eval:html_order('any', 'body', 'a')
+body T_OPEN_BODY_A eval:html_order('open', 'a', 'body')
+body T_OPEN_BODY_A eval:html_order('open', 'body', 'a')
+body T_OPEN_B_CENTER eval:html_order('any', 'b', 'center')
+body T_OPEN_B_CENTER eval:html_order('open', 'center', 'b')
+body T_OPEN_CENTER_BR eval:html_order('open', 'br', 'center')
+body T_OPEN_CENTER_DIV eval:html_order('open', 'div', 'center')
+body T_OPEN_CENTER_FONT eval:html_order('any', 'center', 'font')
+body T_OPEN_CENTER_FONT eval:html_order('open', 'font', 'center')
+body T_OPEN_CENTER_IMG eval:html_order('any', 'center', 'img')
+body T_OPEN_CENTER_IMG eval:html_order('open', 'center', 'img')
+body T_OPEN_CENTER_IMG eval:html_order('open', 'img', 'center')
+body T_OPEN_CENTER_P eval:html_order('open', 'p', 'center')
+body T_OPEN_HTML_CENTER eval:html_order('any', 'html', 'center')
+body T_OPEN_HTML_CENTER eval:html_order('open', 'center', 'html')
+body T_OPEN_HTML_CENTER eval:html_order('open', 'html', 'center')
+body T_OPEN_IMG_CENTER eval:html_order('any', 'img', 'center')
+body T_OPEN_IMG_CENTER eval:html_order('open', 'center', 'img')
+body T_OPEN_IMG_CENTER eval:html_order('open', 'img', 'center')
+body T_OPEN_IMG_DIV eval:html_order('open', 'div', 'img')
+body T_OPEN_IMG_FONT eval:html_order('open', 'font', 'img')
+body T_OPEN_INPUT_P eval:html_order('any', 'input', 'p')
+body T_OPEN_INPUT_P eval:html_order('open', 'p', 'input')
+body T_OPEN_P_CENTER eval:html_order('any', 'p', 'center')
+body T_OPEN_P_CENTER eval:html_order('open', 'center', 'p')
+body T_OPEN_P_INPUT eval:html_order('open', 'input', 'p')
+body T_OPEN_P_STRONG eval:html_order('open', 'strong', 'p')
+body T_OPEN_UNDEF_BODY eval:html_order('open', 'body', 'undef')
+body T_OPEN_UNDEF_BODY eval:html_order('open', 'undef', 'body')
+
+########################################################################