Mailing List Archive

svn commit: rev 6715 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin rules
Author: quinlan
Date: Tue Feb 17 21:52:52 2004
New Revision: 6715

Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
incubator/spamassassin/trunk/rules/70_testing.cf
Log:
add min/max font size support using new font code
add <basefont> support
add remaining CSS3 colors to avoid missing invisible or low contrast text
encode HTML and CSS3 colors using hex (saves about 2k of memory)
merge tr/td handling into one routine
fix some minor bugs with new font code


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm Tue Feb 17 21:52:52 2004
@@ -75,11 +75,13 @@
sub html_init {
my ($self) = @_;

+ $self->{basefont} = 3;
+
undef $self->{text_style};
my %default = (tag => "default",
fgcolor => "#000000",
bgcolor => "#ffffff",
- size => 3);
+ size => $self->{basefont});
push @{ $self->{text_style} }, \%default;
}

@@ -103,6 +105,8 @@
$self->{html}{max_shouting} = 0;
$self->{html}{total_comment_ratio} = 0;
$self->{html}{title_index} = -1;
+ $self->{html}{max_size} = 3; # start at default size
+ $self->{html}{min_size} = 3; # start at default size

$self->{html_text} = [];
$self->{html_last_tag} = 0;
@@ -172,7 +176,7 @@
$self->{html}{"inside_$tag"} = 0 if $self->{html}{"inside_$tag"} < 0;

# TODO: cover other changes
- if ($tag =~ /^(?:body|font|table|tr|th|td|big|small)$/) {
+ if ($tag =~ /^(?:body|font|table|tr|th|td|big|small|basefont)$/) {
$self->text_style($tag, $attr, $num);
}

@@ -420,76 +424,194 @@
return ($h, $s, $v);
}

-# HTML 4 defined 16 colors
my %html_color = (
- aqua => '#00ffff',
- black => '#000000',
- blue => '#0000ff',
- fuchsia => '#ff00ff',
- gray => '#808080',
- green => '#008000',
- lime => '#00ff00',
- maroon => '#800000',
- navy => '#000080',
- olive => '#808000',
- purple => '#800080',
- red => '#ff0000',
- silver => '#c0c0c0',
- teal => '#008080',
- white => '#ffffff',
- yellow => '#ffff00',
-);
-
-# popular X11 colors specified in CSS3 color module
-my %name_color = (
- aliceblue => '#f0f8ff',
- cyan => '#00ffff',
- darkblue => '#00008b',
- darkcyan => '#008b8b',
- darkgray => '#a9a9a9',
- darkgreen => '#006400',
- darkred => '#8b0000',
- firebrick => '#b22222',
- gold => '#ffd700',
- lightslategray=> '#778899',
- magenta => '#ff00ff',
- orange => '#ffa500',
- pink => '#ffc0cb',
- whitesmoke => '#f5f5f5',
+ # HTML 4 defined 16 colors
+ aqua => 0x00ffff,
+ black => 0x000000,
+ blue => 0x0000ff,
+ fuchsia => 0xff00ff,
+ gray => 0x808080,
+ green => 0x008000,
+ lime => 0x00ff00,
+ maroon => 0x800000,
+ navy => 0x000080,
+ olive => 0x808000,
+ purple => 0x800080,
+ red => 0xff0000,
+ silver => 0xc0c0c0,
+ teal => 0x008080,
+ white => 0xffffff,
+ yellow => 0xffff00,
+ # X11 colors specified in CSS3 color module
+ aliceblue => 0xf0f8ff,
+ antiquewhite => 0xfaebd7,
+ aqua => 0x00ffff,
+ aquamarine => 0x7fffd4,
+ azure => 0xf0ffff,
+ beige => 0xf5f5dc,
+ bisque => 0xffe4c4,
+ black => 0x000000,
+ blanchedalmond => 0xffebcd,
+ blue => 0x0000ff,
+ blueviolet => 0x8a2be2,
+ brown => 0xa52a2a,
+ burlywood => 0xdeb887,
+ cadetblue => 0x5f9ea0,
+ chartreuse => 0x7fff00,
+ chocolate => 0xd2691e,
+ coral => 0xff7f50,
+ cornflowerblue => 0x6495ed,
+ cornsilk => 0xfff8dc,
+ crimson => 0xdc143c,
+ cyan => 0x00ffff,
+ darkblue => 0x00008b,
+ darkcyan => 0x008b8b,
+ darkgoldenrod => 0xb8860b,
+ darkgray => 0xa9a9a9,
+ darkgreen => 0x006400,
+ darkgrey => 0xa9a9a9,
+ darkkhaki => 0xbdb76b,
+ darkmagenta => 0x8b008b,
+ darkolivegreen => 0x556b2f,
+ darkorange => 0xff8c00,
+ darkorchid => 0x9932cc,
+ darkred => 0x8b0000,
+ darksalmon => 0xe9967a,
+ darkseagreen => 0x8fbc8f,
+ darkslateblue => 0x483d8b,
+ darkslategray => 0x2f4f4f,
+ darkslategrey => 0x2f4f4f,
+ darkturquoise => 0x00ced1,
+ darkviolet => 0x9400d3,
+ deeppink => 0xff1493,
+ deepskyblue => 0x00bfff,
+ dimgray => 0x696969,
+ dimgrey => 0x696969,
+ dodgerblue => 0x1e90ff,
+ firebrick => 0xb22222,
+ floralwhite => 0xfffaf0,
+ forestgreen => 0x228b22,
+ fuchsia => 0xff00ff,
+ gainsboro => 0xdcdcdc,
+ ghostwhite => 0xf8f8ff,
+ gold => 0xffd700,
+ goldenrod => 0xdaa520,
+ gray => 0x808080,
+ green => 0x008000,
+ greenyellow => 0xadff2f,
+ grey => 0x808080,
+ honeydew => 0xf0fff0,
+ hotpink => 0xff69b4,
+ indianred => 0xcd5c5c,
+ indigo => 0x4b0082,
+ ivory => 0xfffff0,
+ khaki => 0xf0e68c,
+ lavender => 0xe6e6fa,
+ lavenderblush => 0xfff0f5,
+ lawngreen => 0x7cfc00,
+ lemonchiffon => 0xfffacd,
+ lightblue => 0xadd8e6,
+ lightcoral => 0xf08080,
+ lightcyan => 0xe0ffff,
+ lightgoldenrodyellow => 0xfafad2,
+ lightgray => 0xd3d3d3,
+ lightgreen => 0x90ee90,
+ lightgrey => 0xd3d3d3,
+ lightpink => 0xffb6c1,
+ lightsalmon => 0xffa07a,
+ lightseagreen => 0x20b2aa,
+ lightskyblue => 0x87cefa,
+ lightslategray => 0x778899,
+ lightslategrey => 0x778899,
+ lightsteelblue => 0xb0c4de,
+ lightyellow => 0xffffe0,
+ lime => 0x00ff00,
+ limegreen => 0x32cd32,
+ linen => 0xfaf0e6,
+ magenta => 0xff00ff,
+ maroon => 0x800000,
+ mediumaquamarine => 0x66cdaa,
+ mediumblue => 0x0000cd,
+ mediumorchid => 0xba55d3,
+ mediumpurple => 0x9370db,
+ mediumseagreen => 0x3cb371,
+ mediumslateblue => 0x7b68ee,
+ mediumspringgreen => 0x00fa9a,
+ mediumturquoise => 0x48d1cc,
+ mediumvioletred => 0xc71585,
+ midnightblue => 0x191970,
+ mintcream => 0xf5fffa,
+ mistyrose => 0xffe4e1,
+ moccasin => 0xffe4b5,
+ navajowhite => 0xffdead,
+ navy => 0x000080,
+ oldlace => 0xfdf5e6,
+ olive => 0x808000,
+ olivedrab => 0x6b8e23,
+ orange => 0xffa500,
+ orangered => 0xff4500,
+ orchid => 0xda70d6,
+ palegoldenrod => 0xeee8aa,
+ palegreen => 0x98fb98,
+ paleturquoise => 0xafeeee,
+ palevioletred => 0xdb7093,
+ papayawhip => 0xffefd5,
+ peachpuff => 0xffdab9,
+ peru => 0xcd853f,
+ pink => 0xffc0cb,
+ plum => 0xdda0dd,
+ powderblue => 0xb0e0e6,
+ purple => 0x800080,
+ red => 0xff0000,
+ rosybrown => 0xbc8f8f,
+ royalblue => 0x4169e1,
+ saddlebrown => 0x8b4513,
+ salmon => 0xfa8072,
+ sandybrown => 0xf4a460,
+ seagreen => 0x2e8b57,
+ seashell => 0xfff5ee,
+ sienna => 0xa0522d,
+ silver => 0xc0c0c0,
+ skyblue => 0x87ceeb,
+ slateblue => 0x6a5acd,
+ slategray => 0x708090,
+ slategrey => 0x708090,
+ snow => 0xfffafa,
+ springgreen => 0x00ff7f,
+ steelblue => 0x4682b4,
+ tan => 0xd2b48c,
+ teal => 0x008080,
+ thistle => 0xd8bfd8,
+ tomato => 0xff6347,
+ turquoise => 0x40e0d0,
+ violet => 0xee82ee,
+ wheat => 0xf5deb3,
+ white => 0xffffff,
+ whitesmoke => 0xf5f5f5,
+ yellow => 0xffff00,
+ yellowgreen => 0x9acd32,
);

sub name_to_rgb {
- return $html_color{$_[0]} || $name_color{$_[0]} || $_[0];
-}
-
-# this might not be quite right, may need to pay attention to table nesting
-sub close_tag_tr {
- my ($self) = @_;
-
- # don't close if never opened
- return if !grep { $_->{tag} eq "tr" } @{ $self->{text_style} };
-
- my $tag;
- while (@{ $self->{text_style} } && ($tag = $self->{text_style}[-1]->{tag})) {
- if ($tag =~ /^(?:font|td|tr)$/) {
- pop @{ $self->{text_style} };
- }
- else {
- last;
- }
+ my $color = lc $_[0];
+ if (my $hex = $html_color{$color}) {
+ return sprintf("#%06x", $hex);
}
+ return $color;
}

# this might not be quite right, may need to pay attention to table nesting
-sub close_tag_td {
- my ($self) = @_;
+sub close_table_tag {
+ my ($self, $tag) = @_;

# don't close if never opened
- return if !grep { $_->{tag} eq "td" } @{ $self->{text_style} };
+ return unless grep { $_->{tag} eq $tag } @{ $self->{text_style} };

- my $tag;
- while (@{ $self->{text_style} } && ($tag = $self->{text_style}[-1]->{tag})) {
- if ($tag =~ /^(?:font|td)$/) {
+ my $top;
+ while (@{ $self->{text_style} } && ($top = $self->{text_style}[-1]->{tag})) {
+ if (($tag eq "td" && $top =~ /^(?:font|td)$/) ||
+ ($tag eq "tr" && $top =~ /^(?:font|td|tr)$/))
+ {
pop @{ $self->{text_style} };
}
else {
@@ -511,7 +633,6 @@
}

# body, font, table, tr, th, td, big, small
-# TODO: implement <basefont> support
sub text_style {
my ($self, $tag, $attr, $num) = @_;

@@ -526,9 +647,16 @@
# TODO: skip if we've already seen body
}

+ # change basefont (only change size)
+ if ($tag eq "basefont" &&
+ exists $attr->{size} && $attr->{size} =~ /^\s*(\d+)/)
+ {
+ $self->{basefont} = $1;
+ return;
+ }
+
# close elements with optional end tags
- $self->close_tag_tr() if $tag eq "tr";
- $self->close_tag_td() if $tag eq "td";
+ $self->close_table_tag($tag) if ($tag eq "td" || $tag eq "tr");

# copy current text state
my %new = %{ $self->{text_style}[-1] };
@@ -553,18 +681,19 @@
next unless (grep { $_ eq $tag } @{ $ok_attribute{$name} });
if ($name =~ /^(?:text|color)$/) {
# two different names for text color
- $new{fgcolor} = name_to_rgb(lc($attr->{$name}));
- $self->html_font_color_tests($attr->{$name});
+ my $color = name_to_rgb(lc($attr->{$name}));
+ $new{fgcolor} = $color;
+ $self->html_font_color_tests($color);
}
elsif ($name eq "size" && $attr->{size} =~ /^\s*([+-]\d+)/) {
# relative font size
- $new{size} += $1;
+ $new{size} = $self->{basefont} + $1;
}
else {
# overwrite
if ($name eq "bgcolor") {
$attr->{bgcolor} = name_to_rgb(lc($attr->{bgcolor}));
- # one test
+ # one test (text tests are done elsewhere)
if ($tag eq "body" && $attr->{bgcolor} !~ /^\#?ffffff$/) {
$self->{html}{bgcolor_nonwhite} = 1;
}
@@ -577,6 +706,12 @@
$new{$name} = $attr->{$name};
}
}
+ if ($new{size} > $self->{html}{max_size}) {
+ $self->{html}{max_size} = $new{size};
+ }
+ elsif ($new{size} < $self->{html}{min_size}) {
+ $self->{html}{min_size} = $new{size};
+ }
}
push @{ $self->{text_style} }, \%new;
}
@@ -590,18 +725,15 @@
}

sub html_font_color_tests {
- my ($self, $color) = @_;
-
- my $bg = $self->{text_style}[-1]->{fgcolor};
- my $fg = lc($color);
+ my ($self, $c) = @_;

- if ($fg =~ /^\#?[0-9a-f]{6}$/ && $fg !~ /^\#?(?:00|33|66|80|99|cc|ff){3}$/) {
+ if ($c =~ /^\#?[0-9a-f]{6}$/ && $c !~ /^\#?(?:00|33|66|80|99|cc|ff){3}$/) {
$self->{html}{font_color_unsafe} = 1;
}
- if ($fg !~ /^\#?[0-9a-f]{6}$/ && !exists $html_color{$fg}) {
+ if ($c !~ /^\#?[0-9a-f]{6}$/ && !exists $html_color{$c}) {
$self->{html}{font_color_name} = 1;
}
- if ($fg =~ /^\#?([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})$/) {
+ if ($c =~ /^\#?([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})$/) {
my ($h, $s, $v) = rgb_to_hsv(hex($1), hex($2), hex($3));
if (!defined($h)) {
$self->{html}{font_gray} = 1 unless ($v == 0 || $v == 255);

Modified: incubator/spamassassin/trunk/rules/70_testing.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/70_testing.cf (original)
+++ incubator/spamassassin/trunk/rules/70_testing.cf Tue Feb 17 21:52:52 2004
@@ -561,5 +561,19 @@
body T_HTML_TAG_BALANCE_DIV_2 eval:html_tag_balance('div', '> 0')
describe T_HTML_TAG_BALANCE_DIV_2 HTML is missing "div" tags

+body T_HTML_FONT_SMALL_SIZE eval:html_eval('min_size', '< 3')
+body T_HTML_FONT_SMALL_SIZE_0 eval:html_eval('min_size', '< 2')
+body T_HTML_FONT_SMALL_SIZE_1 eval:html_eval('min_size', '< 1')
+body T_HTML_FONT_SMALL_SIZE_2 eval:html_eval('min_size', '< 0')
+body T_HTML_FONT_SMALL_SIZE_3 eval:html_eval('min_size', '< -1')
+body T_HTML_FONT_SMALL_SIZE_4 eval:html_eval('min_size', '< -2')
+
+body T_HTML_FONT_LARGE_SIZE eval:html_eval('max_size', '> 3')
+body T_HTML_FONT_LARGE_SIZE_0 eval:html_eval('max_size', '> 4')
+body T_HTML_FONT_LARGE_SIZE_1 eval:html_eval('max_size', '> 5')
+body T_HTML_FONT_LARGE_SIZE_2 eval:html_eval('max_size', '> 6')
+body T_HTML_FONT_LARGE_SIZE_3 eval:html_eval('max_size', '> 7')
+body T_HTML_FONT_LARGE_SIZE_4 eval:html_eval('max_size', '> 8')
+
# bug 1985
body T_URGENT_BIZ /urgent.{0,16}(?:assistance|business|buy|confidential|notice|proposal|reply|request|response)/i