Mailing List Archive

svn commit: rev 6260 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin
Author: felicity
Date: Sat Jan 24 10:36:47 2004
New Revision: 6260

Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
Log:
bug 2956: uri tests weren't matching where they should if uris were encoded improperly. we now reencode the uris we found correctly and check them too.

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sat Jan 24 10:36:47 2004
@@ -1442,6 +1442,8 @@
sub get_uri_list {
my ($self) = @_;

+ $self->{found_bad_uri_encoding} = 0;
+
my $textary = $self->get_decoded_body_text_array();
my ($rulename, $pat, @uris);
local ($_);
@@ -1491,6 +1493,15 @@

#warn("Got URI: $uri\n");
push @uris, $uri;
+ }
+ }
+
+ # Make sure we catch bad encoding tricks ...
+ foreach my $uri ( @uris ) {
+ my $nuri = Mail::SpamAssassin::Util::URLEncode($uri);
+ if ( $nuri ne $uri ) {
+ push(@uris, $nuri);
+ $self->{found_bad_uri_encoding} = 1;
}
}


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Sat Jan 24 10:36:47 2004
@@ -584,12 +584,40 @@
# Get the type out ...
$ct =~ s/;.*$//; # strip everything after first semi-colon
$ct =~ s@^([^/]+(?:/[^/]*)?).*$@$1@; # only something/something ...
- $ct =~ tr!\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135!!d; # strip inappropriate chars
+ $ct =~ tr/\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135//d; # strip inappropriate chars

return wantarray ? ($ct,$boundary) : $ct;
}

###########################################################################
+
+sub URLEncode {
+ my($url)=@_;
+ my(@characters)=split(/(\%[0-9a-fA-F]{2})/,$url);
+
+ foreach(@characters) {
+ if ( /\%[0-9a-fA-F]{2}/ ) { # Escaped character set ...
+ # IF it is in the range of 0x00-0x20 or 0x7f-0xff
+ # or it is one of "<", ">", """, "#", "%",
+ # ";", "/", "?", ":", "@", "=" or "&"
+ # THEN preserve its encoding
+ unless ( /(20|7f|[0189a-fA-F][0-9a-fA-F])/i
+ || /2[2356fF]|3[a-fA-F]|40/i )
+ {
+ s/\%([2-7][0-9a-fA-F])/sprintf "%c",hex($1)/e;
+ }
+ }
+ else { # Other stuff
+ # 0x00-0x20, 0x7f-0xff, <, >, and " ... "
+ s/([\000-\040\177-\377\074\076\042])
+ /sprintf "%%%02x",unpack("C",$1)/egx;
+ }
+ }
+ return join("",@characters);
+}
+
+###########################################################################
+
sub dbg { Mail::SpamAssassin::dbg (@_); }

1;