Mailing List Archive

svn commit: rev 6282 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin
Author: felicity
Date: Sun Jan 25 12:06:33 2004
New Revision: 6282

Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
Log:
potentially mark when a bad uri encoding occurs.

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sun Jan 25 12:06:33 2004
@@ -1414,7 +1414,7 @@
sub get_uri_list {
my ($self) = @_;

- $self->{found_bad_uri_encoding} = 0;
+ #$self->{found_bad_uri_encoding} = 0;

my $textary = $self->get_decoded_body_text_array();
my ($rulename, $pat, @uris);
@@ -1470,10 +1470,17 @@

# Make sure we catch bad encoding tricks ...
foreach my $uri ( @uris ) {
- my $nuri = Mail::SpamAssassin::Util::URLEncode($uri);
+ next if ( $uri =~ /^mailto:/i );
+
+ my($nuri, $unencoded, $encoded) = Mail::SpamAssassin::Util::URLEncode($uri);
if ( $nuri ne $uri ) {
push(@uris, $nuri);
- $self->{found_bad_uri_encoding} = 1;
+
+ # allow some unencodings to be ok ...
+ # This is essentially HTTP_EXCESSIVE_ESCAPES ...
+ #if ( $unencoded =~ /[a-zA-Z0-9\/]/ ) {
+ # $self->{found_bad_uri_encoding} = 1;
+ #}
}
}


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Sun Jan 25 12:06:33 2004
@@ -594,6 +594,8 @@
sub URLEncode {
my($url)=@_;
my(@characters)=split(/(\%[0-9a-fA-F]{2})/,$url);
+ my(@unencoded) = ();
+ my(@encoded) = ();

foreach(@characters) {
if ( /\%[0-9a-fA-F]{2}/ ) { # Escaped character set ...
@@ -605,15 +607,21 @@
|| /2[2356fF]|3[a-fA-F]|40/i )
{
s/\%([2-7][0-9a-fA-F])/sprintf "%c",hex($1)/e;
+ push(@unencoded, $_);
}
}
else { # Other stuff
# 0x00-0x20, 0x7f-0xff, <, >, and " ... "
s/([\000-\040\177-\377\074\076\042])
- /sprintf "%%%02x",unpack("C",$1)/egx;
+ /push(@encoded,$1) && sprintf "%%%02x",unpack("C",$1)/egx;
}
}
- return join("",@characters);
+ if (wantarray) {
+ return(join("",@characters), join("",@unencoded), join("",@encoded));
+ }
+ else {
+ return join("",@characters);
+ }
}

###########################################################################