Mailing List Archive

svn commit: rev 6514 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin
Author: felicity
Date: Thu Feb 5 09:50:11 2004
New Revision: 6514

Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
Log:
found spam trying to use blank mime boundaries, in violation of the rfc. it's not clear if we should accept or ignore the blank boundary (muas are all over the board wrt how they handle it), so this commit will have us accept the blank boundary. we can easily change it to ignore the blank boundary with a 2-line comment change if we later decide to do things differently.

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm Thu Feb 5 09:50:11 2004
@@ -158,7 +158,11 @@
# Figure out the simple content-type, or set it to text/plain
my $type = $_msg->header('Content-Type') || 'text/plain; charset=us-ascii';

- if ( $type =~ /^multipart\//i ) {
+ # multipart sections are required to have a boundary set ... If this
+ # one doesn't, assume it's malformed and send it to be parsed as a
+ # non-multipart section
+ #
+ if ( $type =~ /^multipart\//i && defined $boundary ) {
# Treat an initial multipart parse differently. This will keep the tree:
# obj(multipart->[ part1, part2 ]) instead of
# obj(obj(multipart ...))
@@ -187,11 +191,10 @@
sub _parse_multipart {
my($self, $msg, $_msg, $boundary, $body) = @_;

- $boundary ||= '';
- dbg("parsing multipart, got boundary: $boundary");
+ dbg("parsing multipart, got boundary: ".(defined $boundary ? $boundary : ''));

# ignore preamble per RFC 1521, unless there's no boundary ...
- if ( $boundary ) {
+ if ( defined $boundary ) {
my $line;
my $tmp_line = @{$body};
for ($line=0; $line < $tmp_line; $line++) {
@@ -214,7 +217,7 @@
my $line_count = @{$body};
foreach ( @{$body} ) {
# if we're on the last body line, or we find a boundary marker, deal with the mime part
- if ( --$line_count == 0 || ($boundary && /^\-\-\Q$boundary\E/) ) {
+ if ( --$line_count == 0 || (defined $boundary && /^\-\-\Q$boundary\E/) ) {
my $line = $_; # remember the last line

# per rfc 1521, the CRLF before the boundary is part of the boundary:
@@ -232,11 +235,11 @@
my($p_boundary);
($part_msg->{'type'}, $p_boundary) = Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
$p_boundary ||= $boundary;
- dbg("found part of type ".$part_msg->{'type'}.", boundary: ".$p_boundary);
+ dbg("found part of type ".$part_msg->{'type'}.", boundary: ".(defined $p_boundary ? $p_boundary : ''));
$self->_parse_body( $msg, $part_msg, $p_boundary, $part_array, 0 );
}

- last if ($boundary && $line =~ /^\-\-\Q${boundary}\E\-\-$/);
+ last if (defined $boundary && $line =~ /^\-\-\Q${boundary}\E\-\-$/);

# make sure we start with a new clean node
$in_body = 0;
@@ -297,6 +300,10 @@

$part_msg->{'type'} =
Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+
+ # multipart sections are required to have a boundary set ... If this
+ # one doesn't, assume it's malformed and revert to text/plain
+ $part_msg->{'type'} = 'text/plain' if ( $part_msg->{'type'} =~ /^multipart\//i && !defined $boundary );

# attempt to figure out a name for this attachment if there is one ...
my $disp = $part_msg->header('content-disposition') || '';

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Thu Feb 5 09:50:11 2004
@@ -576,9 +576,15 @@
# white space, the white space must be presumed to have been added by
# a gateway, and must be deleted.)"
#
- my ($boundary) = $ct =~ m!\bboundary\s*=\s*("[^"]*[^"\s]"|[^";\s]+)!i;
- $boundary =~ tr/"//d if ( defined $boundary ); # remove the double quotes ...
+ ## Be more conservative and require non-blank boundaries?
+ #my($boundary) = $ct =~ m!\bboundary\s*=("[^"]*[^"\s]"|[^";\s]+)!i;
+ ## Be a little more liberal and accept blank boundaries?
+ my($boundary) = $ct =~ m!\bboundary\s*=("[^"]*"|[^";\s]*)!i;

+ # If there are double-quotes in the boundary, get rid of them.
+ $boundary =~ tr/"//d if ( defined $boundary );
+
+ # Parse out the charset and name, if they exist.
my($charset) = $ct =~ /\bcharset\s*=\s*["']?(.*?)["']?(?:;|$)/i;
my($name) = $ct =~ /name\s*=\s*["']?(.*?)["']?(?:;|$)/i;