Mailing List Archive

svn commit: rev 6249 - in incubator/spamassassin/trunk: . lib/Mail lib/Mail/SpamAssassin lib/Mail/SpamAssassin/MIME masses spamd
Author: felicity
Date: Wed Jan 21 13:49:00 2004
New Revision: 6249

Added:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm
- copied, changed from rev 6247, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm
- copied, changed from rev 6247, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm
Removed:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/NoMailAudit.pm
Modified:
incubator/spamassassin/trunk/MANIFEST
incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgLearner.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
incubator/spamassassin/trunk/masses/mass-check
incubator/spamassassin/trunk/spamassassin.raw
incubator/spamassassin/trunk/spamd/spamd.raw
Log:
work towards merging new parser into other areas ...

Modified: incubator/spamassassin/trunk/MANIFEST
==============================================================================
--- incubator/spamassassin/trunk/MANIFEST (original)
+++ incubator/spamassassin/trunk/MANIFEST Wed Jan 21 13:49:00 2004
@@ -38,11 +38,10 @@
lib/Mail/SpamAssassin/HTML.pm
lib/Mail/SpamAssassin/Locales.pm
lib/Mail/SpamAssassin/Locker.pm
-lib/Mail/SpamAssassin/MIME.pm
-lib/Mail/SpamAssassin/MIME/Parser.pm
+lib/Mail/SpamAssassin/MsgContainer.pm
+lib/Mail/SpamAssassin/MsgParser.pm
lib/Mail/SpamAssassin/MailingList.pm
lib/Mail/SpamAssassin/NetSet.pm
-lib/Mail/SpamAssassin/NoMailAudit.pm
lib/Mail/SpamAssassin/PerMsgLearner.pm
lib/Mail/SpamAssassin/PerMsgStatus.pm
lib/Mail/SpamAssassin/PersistentAddrList.pm

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm Wed Jan 21 13:49:00 2004
@@ -63,7 +63,7 @@

=head1 SYNOPSIS

- my $mail = Mail::SpamAssassin::NoMailAudit->new();
+ my $mail = Mail::SpamAssassin::MsgParser->parse();

my $spamtest = Mail::SpamAssassin->new();
my $status = $spamtest->check ($mail);
@@ -111,7 +111,7 @@
use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::ConfSourceSQL;
use Mail::SpamAssassin::PerMsgStatus;
-use Mail::SpamAssassin::NoMailAudit;
+use Mail::SpamAssassin::MsgParser;
use Mail::SpamAssassin::Bayes;

use File::Basename;
@@ -636,7 +636,7 @@
sub check_message_text {
my $self = shift;
my @lines = split (/^/m, $_[0]);
- my $mail_obj = Mail::SpamAssassin::NoMailAudit->new ('data' => \@lines);
+ my $mail_obj = Mail::SpamAssassin::MsgParser->parse (\@lines);
return $self->check ($mail_obj);
}

@@ -682,7 +682,7 @@

# Let's make sure the markup was removed first ...
my @msg = split (/^/m, $self->remove_spamassassin_markup($mail));
- $mail = Mail::SpamAssassin::NoMailAudit->new ('data' => \@msg);
+ $mail = Mail::SpamAssassin::MsgParser->parse (\@msg);

# learn as spam if enabled
if ( $self->{conf}->{bayes_learn_during_report} ) {
@@ -726,7 +726,7 @@

# Let's make sure the markup was removed first ...
my @msg = split (/^/m, $self->remove_spamassassin_markup($mail));
- $mail = Mail::SpamAssassin::NoMailAudit->new ('data' => \@msg);
+ $mail = Mail::SpamAssassin::MsgParser->parse (\@msg);

# learn as nonspam
$self->learn ($mail, undef, 0, 0);
@@ -1116,7 +1116,7 @@
dbg ("ignore: test message to precompile patterns and load modules");
$self->init($use_user_prefs);

- my $mail = Mail::SpamAssassin::NoMailAudit->new(data => \@testmsg);
+ my $mail = Mail::SpamAssassin::MsgParser->parse(\@testmsg);
my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
{ disable_auto_learning => 1 } );
$status->word_is_in_dictionary("aba"); # load triplets.txt into memory
@@ -1159,7 +1159,7 @@
$self->init(1);
$self->{syntax_errors} += $self->{conf}->{errors};

- my $mail = Mail::SpamAssassin::NoMailAudit->new(data => \@testmsg);
+ my $mail = Mail::SpamAssassin::MsgParser->parse(\@testmsg);
my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
{ disable_auto_learning => 1 } );
$status->check();

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm Wed Jan 21 13:49:00 2004
@@ -64,7 +64,7 @@

use Mail::SpamAssassin;
use Mail::SpamAssassin::ArchiveIterator;
-use Mail::SpamAssassin::NoMailAudit;
+use Mail::SpamAssassin::MsgParser;
use Mail::SpamAssassin::PerMsgLearner;

use Getopt::Long;
@@ -334,13 +334,13 @@
{ die 'HITLIMIT'; }

$messagecount++;
- my $ma = Mail::SpamAssassin::NoMailAudit->new ('data' => $dataref);
+ my $ma = Mail::SpamAssassin::MsgParser->parse ($dataref);

if ($ma->get ("X-Spam-Checker-Version")) {
my $newtext = $spamtest->remove_spamassassin_markup($ma);
my @newtext = split (/^/m, $newtext);
$dataref = \@newtext;
- $ma = Mail::SpamAssassin::NoMailAudit->new ('data' => $dataref);
+ $ma = Mail::SpamAssassin::MsgParser->parse ($dataref);
}

$ma->{noexit} = 1;

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm Wed Jan 21 13:49:00 2004
@@ -3623,8 +3623,8 @@
sub _multipart_alternative_difference {
my($self) = @_;

- my @ma = $self->{msg}->{mime_parts}->find_parts(qr@^multipart/alternative\b@i);
- my @content = $self->{msg}->{mime_parts}->content_summary();
+ my @ma = $self->{msg}->find_parts(qr@^multipart/alternative\b@i);
+ my @content = $self->{msg}->content_summary();

$self->{madiff} = 0;


Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm (from rev 6247, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm)
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm Wed Jan 21 13:49:00 2004
@@ -59,7 +59,7 @@
# University of Illinois, Urbana-Champaign.
# </@LICENSE>

-package Mail::SpamAssassin::MIME;
+package Mail::SpamAssassin::MsgContainer;
use strict;
use MIME::Base64;
use Mail::SpamAssassin;
@@ -72,6 +72,7 @@
sub new {
my $class = shift;
$class = ref($class) || $class;
+ my %opts = @_;

my $self = {
headers => {},
@@ -80,6 +81,10 @@
header_order => [],
};

+ foreach ( 'noexit' ) {
+ $self->{$_} = $opts{$_} if ( exists $opts{$_} );
+ }
+
bless($self,$class);

$self;
@@ -367,6 +372,91 @@
return $header;
}

+
+sub get_pristine_header {
+ my ($self, $hdr) = @_;
+
+ return $self->{pristine_headers} unless $hdr;
+ my(@ret) = $self->{pristine_headers} =~ /^(?:$hdr:[ ]+(.*\n(?:\s+\S.*\n)*))/mig;
+ if (@ret) {
+ return wantarray ? @ret : $ret[-1];
+ }
+ else {
+ return $self->get_header($hdr);
+ }
+}
+
+#sub get { shift->get_header(@_); }
+sub get_header {
+ my ($self, $hdr, $raw) = @_;
+ $raw ||= 0;
+
+ # And now pick up all the entries into a list
+ # This is assumed to include a newline at the end ...
+ # This is also assumed to have removed continuation bits ...
+ my @hdrs;
+ if ( $raw ) {
+ @hdrs = map { s/\r?\n\s+/ /g; $_; } $self->raw_header($hdr);
+ }
+ else {
+ @hdrs = map { "$_\n" } $self->header($hdr);
+ }
+
+ if (wantarray) {
+ return @hdrs;
+ }
+ else {
+ return $hdrs[-1];
+ }
+}
+
+#sub header { shift->get_all_headers(@_); }
+sub get_all_headers {
+ my ($self, $raw) = @_;
+ $raw ||= 0;
+
+ my %cache = ();
+ my @lines = ();
+
+ foreach ( @{$self->{header_order}} ) {
+ push(@lines, "$_: ".($self->get_header($_,$raw))[$cache{$_}++]);
+ }
+
+ if (wantarray) {
+ return @lines;
+ } else {
+ return join ('', @lines);
+ }
+}
+
+#sub body { return shift->get_body(@_); }
+sub get_body {
+ my ($self) = @_;
+ my @ret = split(/^/m, $self->{pristine_body});
+ return \@ret;
+}
+
+# ---------------------------------------------------------------------------
+
+sub get_pristine {
+ my ($self) = @_;
+ return $self->{pristine_headers} . $self->{pristine_body};
+}
+
+sub get_pristine_body {
+ my ($self) = @_;
+ return $self->{pristine_body};
+}
+
+sub as_string {
+ my ($self) = @_;
+ return $self->get_all_headers(1) . "\n" . $self->{pristine_body};
+}
+
+sub ignore {
+ my ($self) = @_;
+ exit (0) unless $self->{noexit};
+}

sub dbg { Mail::SpamAssassin::dbg (@_); }


Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm (from rev 6247, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm)
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MIME/Parser.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm Wed Jan 21 13:49:00 2004
@@ -1,6 +1,6 @@
=head1 NAME

-Mail::SpamAssassin::MIME::Parser - parse, decode, and render MIME body parts
+Mail::SpamAssassin::MsgParser - parse, decode, and render MIME body parts

=head1 SYNOPSIS

@@ -17,21 +17,21 @@

=cut

-package Mail::SpamAssassin::MIME::Parser;
+package Mail::SpamAssassin::MsgParser;
use strict;

use Mail::SpamAssassin;
-use Mail::SpamAssassin::MIME;
+use Mail::SpamAssassin::MsgContainer;

=item parse()

-Unlike most modules, Mail::SpamAssassin::MIME::Parser will not return an
-object of the same type, but rather a Mail::SpamAssassin::MIME object.
-To use it, simply call C<Mail::SpamAssassin::MIME::Parser->parse($msg)>,
+Unlike most modules, Mail::SpamAssassin::MsgParser will not return an
+object of the same type, but rather a Mail::SpamAssassin::MsgContainer object.
+To use it, simply call C<Mail::SpamAssassin::MsgParser->parse($msg)>,
where $msg is a scalar with the entire contents of the mesage.

The procedure used to parse a message is recursive and ends up generating
-a tree of M::SA::MIME objects. parse() will generate the parent node
+a tree of M::SA::MsgContainer objects. parse() will generate the parent node
of the tree, then pass the body of the message to _parse_body() which begins
the recursive process.

@@ -41,6 +41,7 @@

sub parse {
my($self,$message) = @_;
+ $message ||= \*STDIN;

dbg("---- MIME PARSER START ----");

@@ -64,7 +65,7 @@
shift @message if ( @message > 0 && $message[0] =~ /^From\s/ );

# Generate the main object and parse the appropriate MIME-related headers into it.
- my $msg = Mail::SpamAssassin::MIME->new();
+ my $msg = Mail::SpamAssassin::MsgContainer->new();
my $header = '';

# Go through all the headers of the message
@@ -180,7 +181,7 @@
# Else, there's no boundary, so leave the whole part...
}

- my $part_msg = Mail::SpamAssassin::MIME->new(); # prepare a new tree node
+ my $part_msg = Mail::SpamAssassin::MsgContainer->new(); # prepare a new tree node
my $in_body = 0;
my $header;
my $part_array;
@@ -214,7 +215,7 @@

# make sure we start with a new clean node
$in_body = 0;
- $part_msg = Mail::SpamAssassin::MIME->new();
+ $part_msg = Mail::SpamAssassin::MsgContainer->new();
undef $part_array;
undef $header;


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgLearner.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgLearner.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgLearner.pm Wed Jan 21 13:49:00 2004
@@ -67,7 +67,7 @@
'rules_filename' => '/etc/spamassassin.rules',
'userprefs_filename' => $ENV{HOME}.'/.spamassassin.cf'
});
- my $mail = Mail::SpamAssassin::NoMailAudit->new();
+ my $mail = Mail::SpamAssassin::MsgParser->parse();

my $status = $spamtest->learn ($mail);
...

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Jan 21 13:49:00 2004
@@ -67,7 +67,7 @@
'rules_filename' => '/etc/spamassassin.rules',
'userprefs_filename' => $ENV{HOME}.'/.spamassassin.cf'
});
- my $mail = Mail::SpamAssassin::NoMailAudit->new();
+ my $mail = Mail::SpamAssassin::MsgParser->parse();

my $status = $spamtest->check ($mail);
if ($status->is_spam()) {
@@ -101,6 +101,7 @@
use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::Received;
use Mail::SpamAssassin::Util;
+use Mail::SpamAssassin::MsgParser;

use constant MAX_BODY_LINE_LENGTH => 2048;

@@ -750,7 +751,7 @@
EOM

my @lines = split (/^/m, $newmsg);
- return Mail::SpamAssassin::NoMailAudit->new(data => \@lines);
+ return Mail::SpamAssassin::MsgParser->parse(\@lines);
}

sub rewrite_headers {
@@ -792,7 +793,7 @@
}

push(@pristine_headers, "\n", split (/^/m, $self->{msg}->get_pristine_body()));
- return Mail::SpamAssassin::NoMailAudit->new(data => \@pristine_headers);
+ return Mail::SpamAssassin::MsgParser->parse(\@pristine_headers);
}

sub _process_header {
@@ -1295,7 +1296,7 @@
my $getraw = ($hdrname eq 'ALL' || $hdrname =~ s/:raw$//);

if ($hdrname eq 'ALL') {
- $_ = $self->{msg}->get_all_headers();
+ $_ = $self->{msg}->get_all_headers($getraw);
}
# EnvelopeFrom: the SMTP MAIL FROM: addr
elsif ($hdrname eq 'EnvelopeFrom') {
@@ -1306,22 +1307,22 @@
}
# ToCc: the combined recipients list
elsif ($hdrname eq 'ToCc') {
- $_ = join ("\n", $self->{msg}->get_header ('To'));
+ $_ = join ("\n", $self->{msg}->get_header ('To', $getraw));
if ($_ ne '') {
chop $_;
$_ .= ", " if /\S/;
}
- $_ .= join ("\n", $self->{msg}->get_header ('Cc'));
+ $_ .= join ("\n", $self->{msg}->get_header ('Cc', $getraw));
undef $_ if $_ eq '';
}
# MESSAGEID: handle lists which move the real message-id to another
# header for resending.
elsif ($hdrname eq 'MESSAGEID') {
$_ = join ("\n", grep { defined($_) && length($_) > 0 }
- $self->{msg}->get_header ('X-Message-Id'),
- $self->{msg}->get_header ('Resent-Message-Id'),
- $self->{msg}->get_header ('X-Original-Message-ID'), # bug 2122
- $self->{msg}->get_header ('Message-Id'));
+ $self->{msg}->get_header ('X-Message-Id', $getraw),
+ $self->{msg}->get_header ('Resent-Message-Id', $getraw),
+ $self->{msg}->get_header ('X-Original-Message-ID', $getraw), # bug 2122
+ $self->{msg}->get_header ('Message-Id', $getraw));
}
# untrusted relays list, as string
elsif ($hdrname eq 'X-Spam-Relays-Untrusted') {
@@ -1333,7 +1334,7 @@
}
# a conventional header
else {
- my @hdrs = $self->{msg}->get_header ($hdrname);
+ my @hdrs = $self->{msg}->get_header ($hdrname, $getraw);
if ($#hdrs >= 0) {
$_ = join ('', @hdrs);
}
@@ -1355,9 +1356,6 @@
s/^[\'\"]*(.*?)[\'\"]*\s*<.+>\s*$/$1/g # Foo Blah <jm@foo>
or s/^.+\s\((.*?)\)\s*$/$1/g; # jm@foo (Foo Blah)
}
- elsif (!$getraw) {
- $_ = $self->mime_decode_header ($_);
- }
}
$self->{hdr_cache}->{$request} = $_;
}
@@ -2372,8 +2370,8 @@
# cannot trust any Envelope-From headers, since they're likely to be
# incorrect fetchmail guesses.

- if ($self->get ("X-Sender")) {
- my $rcvd = $self->get ("Received");
+ if ($self->get ("X-Sender", 1)) {
+ my $rcvd = $self->get ("Received", 1);
if ($rcvd =~ /\(fetchmail/) {
dbg ("X-Sender and fetchmail signatures found, cannot trust envelope-from");
return undef;
@@ -2381,13 +2379,13 @@
}

# procmailrc notes this, amavisd are adding it, we recommend it
- if ($envf = $self->get ("X-Envelope-From")) { goto ok; }
+ if ($envf = $self->get ("X-Envelope-From", 1)) { goto ok; }

# qmail, new-inject(1)
- if ($envf = $self->get ("Envelope-Sender")) { goto ok; }
+ if ($envf = $self->get ("Envelope-Sender", 1)) { goto ok; }

# Postfix, sendmail, also mentioned in RFC821
- if ($envf = $self->get ("Return-Path")) { goto ok; }
+ if ($envf = $self->get ("Return-Path", 1)) { goto ok; }

# give up.
return undef;

Modified: incubator/spamassassin/trunk/masses/mass-check
==============================================================================
--- incubator/spamassassin/trunk/masses/mass-check (original)
+++ incubator/spamassassin/trunk/masses/mass-check Wed Jan 21 13:49:00 2004
@@ -120,7 +120,7 @@
eval "use bytes";
use Mail::SpamAssassin::ArchiveIterator;
use Mail::SpamAssassin;
-use Mail::SpamAssassin::NoMailAudit;
+use Mail::SpamAssassin::MsgParser;
use Getopt::Long;
use POSIX qw(strftime);
use constant HAS_TIME_PARSEDATE => eval { require Time::ParseDate; };
@@ -286,7 +286,7 @@
my ($id, $time, $dataref) = @_;
my $out;

- my $ma = Mail::SpamAssassin::NoMailAudit->new('data' => $dataref);
+ my $ma = Mail::SpamAssassin::MsgParser->parse($dataref);
$ma->{noexit} = 1;

# remove SpamAssassin markup, if present and the mail was spam
@@ -295,7 +295,7 @@
my $newtext = $spamtest->remove_spamassassin_markup($ma);
my @newtext = split (/^/m, $newtext);
$dataref = \@newtext;
- $ma = Mail::SpamAssassin::NoMailAudit->new ('data' => $dataref);
+ $ma = Mail::SpamAssassin::MsgParser->parse ($dataref);
}

my $status = $spamtest->check($ma);

Modified: incubator/spamassassin/trunk/spamassassin.raw
==============================================================================
--- incubator/spamassassin/trunk/spamassassin.raw (original)
+++ incubator/spamassassin/trunk/spamassassin.raw Wed Jan 21 13:49:00 2004
@@ -64,7 +64,7 @@

eval {
require Mail::SpamAssassin;
- require Mail::SpamAssassin::NoMailAudit;
+ require Mail::SpamAssassin::MsgParser;

# gnu_getopt is not available in Getopt::Long 2.24, see bug 732
# gnu_compat neither.
@@ -123,9 +123,8 @@

my $mail;

- use Mail::SpamAssassin::NoMailAudit;
if (!$opt{'lint'} && !$doing_address_only_whitelisting) {
- $mail = Mail::SpamAssassin::NoMailAudit->new ();
+ $mail = Mail::SpamAssassin::MsgParser->parse ();
}

# create the tester factory

Modified: incubator/spamassassin/trunk/spamd/spamd.raw
==============================================================================
--- incubator/spamassassin/trunk/spamd/spamd.raw (original)
+++ incubator/spamassassin/trunk/spamd/spamd.raw Wed Jan 21 13:49:00 2004
@@ -25,7 +25,7 @@
use IO::Pipe;

use Mail::SpamAssassin;
-use Mail::SpamAssassin::NoMailAudit;
+use Mail::SpamAssassin::MsgParser;
use Mail::SpamAssassin::NetSet;

use Getopt::Long;
@@ -731,9 +731,7 @@
"."
);

- my $mail = Mail::SpamAssassin::NoMailAudit->new (
- data => \@msglines
- );
+ my $mail = Mail::SpamAssassin::MsgParser->parse (\@msglines);

# Check length if we're supposed to
if($expected_length && ($actual_length != $expected_length)) {