Mailing List Archive

svn commit: rev 6761 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin rules
Author: quinlan
Date: Wed Feb 18 20:04:43 2004
New Revision: 6761

Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm
incubator/spamassassin/trunk/rules/70_testing.cf
Log:
bug 2996: HTML attribute testing


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm Wed Feb 18 20:04:43 2004
@@ -46,6 +46,12 @@
# other non-standard tags
$re_other = 'o:\w+/?|x-sigsep|x-tab';

+# attributes: HTML 4.01 deprecated, loose DTD, frameset DTD
+my $re_attr = 'abbr|accept-charset|accept|accesskey|action|align|alink|alt|archive|axis|background|bgcolor|border|cellpadding|cellspacing|char|charoff|charset|checked|cite|class|classid|clear|code|codebase|codetype|color|cols|colspan|compact|content|coords|data|datetime|declare|defer|dir|disabled|enctype|face|for|frame|frameborder|headers|height|href|hreflang|hspace|http-equiv|id|ismap|label|lang|language|link|longdesc|marginheight|marginwidth|maxlength|media|method|multiple|name|nohref|noresize|noshade|nowrap|object|onblur|onchange|onclick|ondblclick|onfocus|onkeydown|onkeypress|onkeyup|onload|onmousedown|onmousemove|onmouseout|onmouseover|onmouseup|onreset|onselect|onsubmit|onunload|profile|prompt|readonly|rel|rev|rows|rowspan|rules|scheme|scope|scrolling|selected|shape|size|span|src|standby|start|style|summary|tabindex|target|text|title|type|usemap|valign|value|valuetype|version|vlink|vspace|width';
+
+# attributes: stuff we accept
+my $re_attr_extra = 'family|wrap|/';
+
# style attributes
my %ok_attribute = (
text => [qw(body)],
@@ -175,6 +181,17 @@
$self->{html}{"inside_$tag"} += $num;
$self->{html}{"inside_$tag"} = 0 if $self->{html}{"inside_$tag"} < 0;

+ # attributes
+ for my $name (keys %$attr) {
+ if ($name !~ /^(?:$re_attr|$re_attr_extra)$/io) {
+ $self->{html}{attr_bad}++;
+ $self->{html}{attr_unique_bad}++ if !exists $self->{"attr_seen_$name"};
+ }
+ $self->{html}{attr_all}++;
+ $self->{html}{attr_unique_all}++ if !exists $self->{"attr_seen_$name"};
+ $self->{"attr_seen_$name"} = 1;
+ }
+
# TODO: cover other changes
if ($tag =~ /^(?:body|font|table|tr|th|td|big|small|basefont|marquee)$/) {
$self->text_style($tag, $attr, $num);
@@ -828,7 +845,6 @@
($size =~ /\+(\d+)/ && $1 >= 1));
}
if ($tag eq "font" && exists $attr->{face}) {
- #print STDERR "FONT " . $attr->{face} . "\n";
if ($attr->{face} =~ /[A-Z]{3}/ && $attr->{face} !~ /M[ST][A-Z]|ITC/) {
$self->{html}{font_face_caps} = 1;
}

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm Wed Feb 18 20:04:43 2004
@@ -374,6 +374,12 @@
if (exists $r->{tags} && exists $r->{obfuscation}) {
$r->{obfuscation_ratio} = $r->{obfuscation} / $r->{tags};
}
+ if (exists $r->{attr_bad} && exists $r->{attr_all}) {
+ $r->{attr_bad} = $r->{attr_bad} / $r->{attr_all};
+ }
+ if (exists $r->{attr_unique_bad} && exists $r->{attr_unique_all}) {
+ $r->{attr_unique_bad} = $r->{attr_unique_bad} / $r->{attr_unique_all};
+ }
}
else {
$self->{'rendered_type'} = $self->{'type'};

Modified: incubator/spamassassin/trunk/rules/70_testing.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/70_testing.cf (original)
+++ incubator/spamassassin/trunk/rules/70_testing.cf Wed Feb 18 20:04:43 2004
@@ -664,3 +664,15 @@
uri T_URI_HTTP_TO_HEX_IP /^https?:\/\/(?:[^\@]*\@|)0x[0-9a-f]{8}/i
describe T_URI_HTTP_TO_HEX_IP URI contains a link to a hexadecimal IP address

+# bug 2996: HTML attribute testing
+body T_HTML_ATTR_00 eval:html_range('attr_bad','0.0','0.2')
+body T_HTML_ATTR_20 eval:html_range('attr_bad','0.2','0.4')
+body T_HTML_ATTR_40 eval:html_range('attr_bad','0.4','0.6')
+body T_HTML_ATTR_60 eval:html_range('attr_bad','0.6','0.8')
+body T_HTML_ATTR_80 eval:html_range('attr_bad','0.8','1.0')
+
+body T_HTML_ATTR_UNIQUE_00 eval:html_range('attr_unique_bad','0.0','0.2')
+body T_HTML_ATTR_UNIQUE_20 eval:html_range('attr_unique_bad','0.2','0.4')
+body T_HTML_ATTR_UNIQUE_40 eval:html_range('attr_unique_bad','0.4','0.6')
+body T_HTML_ATTR_UNIQUE_60 eval:html_range('attr_unique_bad','0.6','0.8')
+body T_HTML_ATTR_UNIQUE_80 eval:html_range('attr_unique_bad','0.8','1.0')