Mailing List Archive

svn commit: r109812 - /spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm /spamassassin/trunk/t/missing_hb_separator.t
Author: felicity
Date: Sat Dec 4 09:05:23 2004
New Revision: 109812

URL: http://svn.apache.org/viewcvs?view=rev&rev=109812
Log:
found more cases where the message parser would do the wrong thing wrt missing header/body separator. built out the test a bit more, redid the header parsing routine a bit to deal with all of it. moved the mbox/mbx separator bit out of the loop since it should only ever hit on the first message line.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
spamassassin/trunk/t/missing_hb_separator.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?view=diff&rev=109812&p1=spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm&r1=109811&p2=spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm&r2=109812
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Sat Dec 4 09:05:23 2004
@@ -125,111 +125,105 @@
@message = split ( /^/m, $message );
}

+ return $self unless @message;
+
+ # Pull off mbox and mbx separators
+ if ( $message[0] =~ /^From\s/ ) {
+ # mbox formated mailbox
+ $self->{'mbox_sep'} = shift @message;
+ } elsif ($message[0] =~ MBX_SEPARATOR) {
+ $_ = shift @message;
+
+ # Munge the mbx message separator into mbox format as a sort of
+ # de facto portability standard in SA's internals. We need to
+ # to this so that Mail::SpamAssassin::Util::parse_rfc822_date
+ # can parse the date string...
+ if (/([\s|\d]\d)-([a-zA-Z]{3})-(\d{4})\s(\d{2}):(\d{2}):(\d{2})/) {
+ # $1 = day of month
+ # $2 = month (text)
+ # $3 = year
+ # $4 = hour
+ # $5 = min
+ # $6 = sec
+ my @arr = localtime(timelocal($6,$5,$4,$1,$MONTH{lc($2)}-1,$3));
+ my $address;
+ foreach (@message) {
+ if (/From:\s[^<]+<([^>]+)>/) {
+ $address = $1;
+ last;
+ } elsif (/From:\s([^<^>]+)/) {
+ $address = $1;
+ last;
+ }
+ }
+ $self->{'mbox_sep'} = "From $address $DAY_OF_WEEK[$arr[6]] $2 $1 $4:$5:$6 $3\n";
+ }
+ }
+
# Go through all the headers of the message
my $header = '';
my $boundary;
while ( my $current = shift @message ) {
- if ( $current =~ /^From\s/ ) {
- # mbox formated mailbox
- $self->{'mbox_sep'} = $current;
- next;
- } elsif ($current =~ MBX_SEPARATOR) {
- # Munge the mbx message separator into mbox format as a sort of
- # de facto portability standard in SA's internals. We need to
- # to this so that Mail::SpamAssassin::Util::parse_rfc822_date
- # can parse the date string...
- if (/([\s|\d]\d)-([a-zA-Z]{3})-(\d{4})\s(\d{2}):(\d{2}):(\d{2})/o) {
- # $1 = day of month
- # $2 = month (text)
- # $3 = year
- # $4 = hour
- # $5 = min
- # $6 = sec
- my @arr = localtime(timelocal($6,$5,$4,$1,$MONTH{lc($2)}-1,$3));
- my $address;
- foreach (@message) {
- if (/From:\s[^<]+<([^>]+)>/) {
- $address = $1;
- last;
- } elsif (/From:\s([^<^>]+)/) {
- $address = $1;
- last;
- }
- }
- $self->{'mbox_sep'} = "From $address $DAY_OF_WEEK[$arr[6]] $2 $1 $4:$5:$6 $3\n";
- next;
- }
- }
-
- # Store the non-modified headers in a scalar
- # if missing_head_body_separator is set, this is the last run through, to
- # deal with the last header in the message.
unless ($self->{'missing_head_body_separator'}) {
$self->{'pristine_headers'} .= $current;
-
- # Check for missing head/body separator and deal appropriately
- if (!@message || (defined $boundary && $message[0] =~ /^--\Q$boundary\E(?:--|\s*$)/)) {
- # No body or no separator before mime boundary is invalid
- $self->{'missing_head_body_separator'} = 1;
- unshift(@message, "\n");
- }
}

# NB: Really need to figure out special folding rules here!
- if ( $current =~ /^[ \t]+/ ) { # if its a continuation
+ if ( $current =~ /^[ \t]+\S/ ) {
+ # append continuations if there's a header in process
if ($header) {
- $header .= $current; # fold continuations
-
- # If we're currently dealing with a content-type header, and there's a
- # boundary defined, use it. Since there could be multiple
- # content-type headers in a message, the last one will be the one we
- # should use, so just keep updating as they come in.
- if ($header =~ /^content-type:\s*(\S.*)$/is) {
- my($type,$temp_boundary) = Mail::SpamAssassin::Util::parse_content_type($1);
- $boundary = $temp_boundary if (defined $temp_boundary);
- }
+ $header .= $current;
}
+ }
+ else {
+ # Ok, there's a header here, let's go ahead and add it in.
+ if ($header) {
+ # Yes, the /s is needed to match \n too.
+ my ($key, $value) = split (/:\s*(?=.)/s, $header, 2);

- # we'll just ignore if there's no header already set, for now.
+ # If it's not a valid header (aka: not in the form "foo: bar"), skip it.
+ if (defined $value) {
+ # limit the length of the pairs we store
+ if (length($key) > MAX_HEADER_KEY_LENGTH) {
+ $key = substr($key, 0, MAX_HEADER_KEY_LENGTH);
+ $self->{'truncated_header'} = 1;
+ }
+ if (length($value) > MAX_HEADER_VALUE_LENGTH) {
+ $value = substr($value, 0, MAX_HEADER_VALUE_LENGTH);
+ $self->{'truncated_header'} = 1;
+ }
+ $self->header($key, $value);
+ }
+ }

- next;
+ # not a continuation...
+ $header = $current;
}

- # Ok, there's a header here, let's go ahead and add it in.
if ($header) {
- # Yes, the /s is needed to match \n too.
- my ($key, $value) = split (/:\s*(?=.)/s, $header, 2);
-
- # If it's not a valid header (aka: not in the form "foo: bar"), skip it.
- if (defined $value) {
- # limit the length of the pairs we store
- if (length($key) > MAX_HEADER_KEY_LENGTH) {
- $key = substr($key, 0, MAX_HEADER_KEY_LENGTH);
- $self->{'truncated_header'} = 1;
- }
- if (length($value) > MAX_HEADER_VALUE_LENGTH) {
- $value = substr($value, 0, MAX_HEADER_VALUE_LENGTH);
- $self->{'truncated_header'} = 1;
- }
- $self->header($key, $value);
-
+ if ($header =~ /^\r?$/) {
+ last;
+ }
+ else {
# If we're currently dealing with a content-type header, and there's a
# boundary defined, use it. Since there could be multiple
# content-type headers in a message, the last one will be the one we
# should use, so just keep updating as they come in.
- if (lc $key eq 'content-type') {
- my($type,$temp_boundary) = Mail::SpamAssassin::Util::parse_content_type($value);
+ if ($header =~ /^content-type:\s*(\S.*)$/is) {
+ my($type,$temp_boundary) = Mail::SpamAssassin::Util::parse_content_type($1);
$boundary = $temp_boundary if (defined $temp_boundary);
}
+
+ # Check for missing head/body separator
+ if (!@message || (defined $boundary && $message[0] =~ /^--\Q$boundary\E(?:--|\s*$)/)) {
+ # No body or no separator before mime boundary is invalid
+ $self->{'missing_head_body_separator'} = 1;
+ unshift(@message, "\n");
+ }
}
}
-
- # Ok, we found the header/body blank line ...
- last if ($current =~ /^\r?$/);
-
- # not a continuation...
- $header = $current;
}
+ undef $header;

# Store the pristine body for later -- store as a copy since @message
# will get modified below

Modified: spamassassin/trunk/t/missing_hb_separator.t
Url: http://svn.apache.org/viewcvs/spamassassin/trunk/t/missing_hb_separator.t?view=diff&rev=109812&p1=spamassassin/trunk/t/missing_hb_separator.t&r1=109811&p2=spamassassin/trunk/t/missing_hb_separator.t&r2=109812
==============================================================================
--- spamassassin/trunk/t/missing_hb_separator.t (original)
+++ spamassassin/trunk/t/missing_hb_separator.t Sat Dec 4 09:05:23 2004
@@ -20,7 +20,7 @@
use SATest; sa_t_init("missing_hb_separator");
use Mail::SpamAssassin;

-plan tests => 3;
+plan tests => 8;

# initialize SpamAssassin
my $sa = Mail::SpamAssassin->new({
@@ -42,7 +42,7 @@

# make sure we catch w/out body, and that we catch the last header

-@msg = ("Content-Type: text/plain; boundary=--foo\n","X-Message-Info: foo\n");
+@msg = ("Content-Type: text/plain; boundary=foo\n","X-Message-Info: foo\n");
$mail = $sa->parse(\@msg, 1);
$status = $sa->check($mail);

@@ -52,6 +52,7 @@
}

ok ( $result == 2 );
+ok ( $mail->{pristine_body} eq "" );

$status->finish();
$mail->finish();
@@ -61,7 +62,7 @@
# we should also catch no separator before the mime part boundary, and the
# last header

-@msg = ("Content-Type: text/plain;\n"," boundary=--foo\n","X-Message-Info: foo\n","--foo\n");
+@msg = ("Content-Type: text/plain;\n"," boundary=foo\n","X-Message-Info: foo\n","--foo\n");
$mail = $sa->parse(\@msg, 1);
$status = $sa->check($mail);

@@ -71,15 +72,34 @@
}

ok ( $result == 2 );
+ok ( $mail->{pristine_body} eq "--foo\n" );

$status->finish();
$mail->finish();

#####

+@msg = ("X-Message-Info: foo\n", "Content-Type: text/plain; boundary=foo\n","--foo\n");
+$mail = $sa->parse(\@msg, 1);
+$status = $sa->check($mail);
+
+$result = 0;
+foreach (@{$status->{test_names_hit}}) {
+ $result++ if ($_ eq 'MISSING_HB_SEP' || $_ eq 'X_MESSAGE_INFO');
+}
+
+ok ( $result == 2 );
+ok ( $mail->{pristine_body} eq "--foo\n" );
+
+$status->finish();
+$mail->finish();
+
+
+#####
+
# A normal message, should not trigger

-@msg = ("Content-Type: text/plain; boundary=--foo\n","\n","--foo\n");
+@msg = ("Content-Type: text/plain;\n", " boundary=foo\n","\n","--foo\n");
$mail = $sa->parse(\@msg, 1);
$status = $sa->check($mail);

@@ -88,7 +108,8 @@
$result = 0 if ($_ eq 'MISSING_HB_SEP');
}

-ok ( $result );
+ok ( $result && $mail->{pristine_body} eq "--foo\n" );
+ok ( $mail->{pristine_body} eq "--foo\n" );

$status->finish();
$mail->finish();