Mailing List Archive

svn commit: r485836 - in /spamassassin/branches/3.1: lib/Mail/SpamAssassin/ArchiveIterator.pm sa-learn.raw spamassassin.raw
Author: dos
Date: Mon Dec 11 11:37:16 2006
New Revision: 485836

URL: http://svn.apache.org/viewvc?view=rev&rev=485836
Log:
bug 5145: better deal with STDIN in spamassassin and sa-learn, since ArchiveIterator doesn't deal with it so well

Modified:
spamassassin/branches/3.1/lib/Mail/SpamAssassin/ArchiveIterator.pm
spamassassin/branches/3.1/sa-learn.raw
spamassassin/branches/3.1/spamassassin.raw

Modified: spamassassin/branches/3.1/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/lib/Mail/SpamAssassin/ArchiveIterator.pm?view=diff&rev=485836&r1=485835&r2=485836
==============================================================================
--- spamassassin/branches/3.1/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/branches/3.1/lib/Mail/SpamAssassin/ArchiveIterator.pm Mon Dec 11 11:37:16 2006
@@ -279,18 +279,21 @@
C<mbox> an mbox formatted file, or C<mbx> for an mbx formatted directory.

C<detect> can also be used. This assumes C<mbox> for any file whose path
-contains the pattern C</\.mbox/i>, C<file> for STDIN and anything that is
-not a directory, or C<directory> otherwise.
+contains the pattern C</\.mbox/i>, C<file> anything that is not a
+directory, or C<directory> otherwise.

=item raw_location

-Path to file or directory. Can be "-" for STDIN. File globbing is allowed
-using the standard csh-style globbing (see C<perldoc -f glob>). C<~> at the
-front of the value will be replaced by the C<HOME> environment variable.
-Escaped whitespace is protected as well.
+Path to file or directory. File globbing is allowed using the
+standard csh-style globbing (see C<perldoc -f glob>). C<~> at the
+front of the value will be replaced by the C<HOME> environment
+variable. Escaped whitespace is protected as well.

B<NOTE:> C<~user> is not allowed.

+B<NOTE 2:> C<-> is not allowed as a raw location. To have
+ArchiveIterator deal with STDIN, generate a temp file.
+
=back

=cut
@@ -727,6 +730,11 @@
next;
}

+ if ($rawloc eq '-') {
+ warn 'archive-iterator: raw location "-" is not supported';
+ next;
+ }
+
# use ham by default, things like "spamassassin" can't specify the type
$class = substr($class, 0, 1) || 'h';

@@ -741,8 +749,7 @@
# filename indicates mbox
$method = \&scan_mailbox;
}
- elsif ($location eq '-' || !(-d $location)) {
- # stdin is considered a file if not passed as mbox
+ elsif (!(-d $location)) {
$method = \&scan_file;
}
else {
@@ -970,7 +977,7 @@
my ($self, $class, $folder) = @_;
my @files;

- if ($folder ne '-' && -d $folder) {
+ if (-d $folder) {
# passed a directory of mboxes
$folder =~ s/\/\s*$//; #Remove trailing slash, if there
if (!opendir(DIR, $folder)) {
@@ -1071,7 +1078,7 @@
my ($self, $class, $folder) = @_;
my (@files, $fp);

- if ($folder ne '-' && -d $folder) {
+ if (-d $folder) {
# got passed a directory full of mbx folders.
$folder =~ s/\/\s*$//; # remove trailing slash, if there is one
if (!opendir(DIR, $folder)) {

Modified: spamassassin/branches/3.1/sa-learn.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/sa-learn.raw?view=diff&rev=485836&r1=485835&r2=485836
==============================================================================
--- spamassassin/branches/3.1/sa-learn.raw (original)
+++ spamassassin/branches/3.1/sa-learn.raw Mon Dec 11 11:37:16 2006
@@ -368,29 +368,44 @@
close(F);
}

- # add leftover args as targets
- foreach (@ARGV) { target($_); }
+ ###########################################################################
+ # Deal with the target listing, and STDIN -> tempfile

- #No arguments means they want stdin:
- if ( $#targets < 0 ) {
- target('-');
- }
+ my $tempfile; # will be defined if stdin -> tempfile
+ push(@targets, @ARGV);
+ @targets = ('-') unless @targets;
+
+ for(my $elem = 0; $elem <= $#targets; $elem++) {
+ # ArchiveIterator doesn't really like STDIN, so if "-" is specified
+ # as a target, make it a temp file instead.
+ if ( $targets[$elem] =~ /(?:^|:)-$/ ) {
+ if (defined $tempfile) {
+ # uh-oh, stdin specified multiple times?
+ warn "skipping extra stdin target (".$targets[$elem].")\n";
+ splice @targets, $elem, 1;
+ $elem--; # go back to this element again
+ next;
+ }
+ else {
+ my $handle;

- # mbox and mbx doesn't deal with STDIN, so make a temp file if they want STDIN.
- # do it here since they may specify "-" on the commandline
- #
- my $tempfile;
- if ( $targets[0] =~ /:mbo?x:-$/ ) {
- my $handle;
-
- local $/ = undef; # go into slurp mode
- ( $tempfile, $handle ) = Mail::SpamAssassin::Util::secure_tmpfile();
- print {$handle} <STDIN>;
- close $handle;
+ local $/ = undef; # go into slurp mode
+ ( $tempfile, $handle ) = Mail::SpamAssassin::Util::secure_tmpfile();
+ print {$handle} <STDIN>;
+ close $handle;

- # re-aim the targets at the tempfile instead of STDIN
- $targets[0] =~ s/:-$/:$tempfile/;
+ # re-aim the targets at the tempfile instead of STDIN
+ $targets[$elem] =~ s/-$/$tempfile/;
+ }
+ }
+
+ # make sure the target list is in the normal AI format
+ if ($targets[$elem] !~ /^[^:]*:[a-z]+:/) {
+ $targets[$elem] = target($targets[$elem]);
+ }
}
+
+ ###########################################################################

my $iter = new Mail::SpamAssassin::ArchiveIterator(
{

Modified: spamassassin/branches/3.1/spamassassin.raw
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/spamassassin.raw?view=diff&rev=485836&r1=485835&r2=485836
==============================================================================
--- spamassassin/branches/3.1/spamassassin.raw (original)
+++ spamassassin/branches/3.1/spamassassin.raw Mon Dec 11 11:37:16 2006
@@ -304,6 +304,44 @@
}

###########################################################################
+# Deal with the target listing, and STDIN -> tempfile
+
+my $tempfile; # will be defined if stdin -> tempfile
+push(@targets, @ARGV);
+@targets = ('-') unless @targets;
+
+for(my $elem = 0; $elem <= $#targets; $elem++) {
+ # ArchiveIterator doesn't really like STDIN, so if "-" is specified
+ # as a target, make it a temp file instead.
+ if ( $targets[$elem] =~ /(?:^|:)-$/ ) {
+ if (defined $tempfile) {
+ # uh-oh, stdin specified multiple times?
+ warn "skipping extra stdin target (".$targets[$elem].")\n";
+ splice @targets, $elem, 1;
+ $elem--; # go back to this element again
+ next;
+ }
+ else {
+ my $handle;
+
+ local $/ = undef; # go into slurp mode
+ ( $tempfile, $handle ) = Mail::SpamAssassin::Util::secure_tmpfile();
+ print {$handle} <STDIN>;
+ close $handle;
+
+ # re-aim the targets at the tempfile instead of STDIN
+ $targets[$elem] =~ s/-$/$tempfile/;
+ }
+ }
+
+ # make sure the target list is in the normal AI format
+ if ($targets[$elem] !~ /^[^:]*:[a-z]+:/) {
+ my $format = $opt{'format'} || 'detect';
+ $targets[$elem] = join ( ":", '', $format, $targets[$elem] );
+ }
+}
+
+###########################################################################

# Everything below here needs ArchiveIterator ...
my $iter = new Mail::SpamAssassin::ArchiveIterator(
@@ -316,28 +354,6 @@
);

$iter->set_functions( \&wanted, \&result );
-
-# add leftover args as targets
-# no arguments means they want stdin:
-push ( @ARGV, '-' ) if ( !@ARGV );
-@targets = map { join ( ":", '', $opt{'format'}, $_ ) } @ARGV;
-
-# mbox and mbx handling doesn't deal with STDIN, so make a temp file if they
-# want STDIN. do it here since they may specify "-" on the commandline
-# instead of leaving it as the default.
-#
-my $tempfile;
-if ( $targets[0] =~ /:mbo?x:-$/ ) {
- my $handle;
-
- local $/ = undef; # go into slurp mode
- ( $tempfile, $handle ) = Mail::SpamAssassin::Util::secure_tmpfile();
- print {$handle} <STDIN>;
- close $handle;
-
- # re-aim the targets at the tempfile instead of STDIN
- $targets[0] =~ s/:-$/:$tempfile/;
-}

# Go run the messages!
# bug 4930: use a temp variable since "||=" decides whether or not to set the