Mailing List Archive

svn commit: r202274 - in /spamassassin/trunk/masses: mass-check-results-to-mbox mboxget
Author: jm
Date: Tue Jun 28 13:08:15 2005
New Revision: 202274

URL: http://svn.apache.org/viewcvs?rev=202274&view=rev
Log:
masses consolidation: refactor mass-check-results-to-mbox functionality into mboxget. note: this means that mboxget now annotates its output with the original message's mass-check ID in a new 'X-Mass-Check-Id:' header; use '-noannotate' switch to avoid that. Otherwise perfectly compatible with both scripts.

Modified:
spamassassin/trunk/masses/mass-check-results-to-mbox
spamassassin/trunk/masses/mboxget

Modified: spamassassin/trunk/masses/mass-check-results-to-mbox
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/mass-check-results-to-mbox?rev=202274&r1=202273&r2=202274&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check-results-to-mbox (original)
+++ spamassassin/trunk/masses/mass-check-results-to-mbox Tue Jun 28 13:08:15 2005
@@ -1,148 +1,2 @@
#!/usr/bin/perl
-#
-# very handy for e.g.:
-#
-# grep SUBJECT_FREQ spam.log | ./mass-check-results-to-mbox | grep Subject:
-#
-# <@LICENSE>
-# Copyright 2004 Apache Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# </@LICENSE>
-
-my $grep = undef;
-my $annotate = 1;
-while ($#ARGV >= 0) {
- $_ = $ARGV[0]; shift;
- if ($_ eq '-grep') { $grep = $ARGV[0]; shift; }
- if ($_ eq '-noannotate') { $annotate = 0; }
-}
-
-while (<>) {
- s/^[^\s:]+://; # filenames, from "grep foo *"
-
- next if /^#/;
- /^.\s+-?\d+\s+(\S+) / or next;
- my $mail = $1;
-
- if ($mail =~ /^(\S+):</) {
- my $msgp = find_in_mailbox ($mail);
- if (defined $msgp) {
- $annotate and unshift (@$msgp, "X-Mass-Check-Id: $mail\n");
- handle ($msgp);
- } else {
- mywarn ("failed to find message for $mail\n");
- }
-
- } else {
- if ($mail =~ /\.gz$/) {
- open (IN, "gunzip -cd $mail |") or mywarn ("gunzip $mail failed: $@");
- } elsif ($mail =~ /\.bz2$/) {
- open (IN, "bzip2 -cd $mail |") or mywarn ("bunzip2 $mail failed: $@");
- } else {
- open (IN, "<$mail") or mywarn ("open $mail failed: $@");
- }
- my @msg = (<IN>); close IN;
-
- while (scalar @msg > 0 &&
- $msg[0] =~ /^(?:From|X-Mass-Check-Id:) /)
- {
- shift @msg;
- }
- $annotate and unshift (@msg, "X-Mass-Check-Id: $mail\n");
-
- handle (\@msg);
- }
-}
-
-###########################################################################
-
-sub find_in_mailbox {
- my ($mail) = @_;
- $mail =~ /^(\S+):</;
- $folder = $1; my $wantid = $_;
-
- if (defined $CURRENT_MBOX_OPEN && $folder eq $CURRENT_MBOX_OPEN) {
- # try from current position first
- my $msgp = mbox_search($mail, $folder);
- if (defined ($msgp->[0])) { return $msgp; }
- }
-
- # failed. have to (re-|)open.
- if ($folder =~ /\.gz$/) {
- open (MBOX, "gunzip -cd $folder |") or mywarn ("gunzip $folder failed: $@");
- } elsif ($folder =~ /\.bz2$/) {
- open (MBOX, "bzip2 -cd $folder |") or mywarn ("bunzip2 $folder failed: $@");
- } else {
- open (MBOX, "<$folder") or mywarn ("open $folder failed: $@");
- }
-
- $CURRENT_MBOX_OPEN = $folder;
- while (<MBOX>) { /^From \S+ +... ... / and last; }
- my $msgp = mbox_search($mail, $folder);
- return $msgp;
-}
-
-sub mbox_search {
- my ($mail, $folder) = @_;
- my $wantid = $mail;
-
- my $count = 0;
- my $host = $ENV{'HOSTNAME'} || $ENV{'HOST'} || `hostname` || 'localhost';
-
- while (!eof MBOX) {
- my @msg = ();
- my $msgid = undef;
- my $in_header = 1;
- $count++;
-
- while (<MBOX>) {
- if (/^$/ && $in_header) {
- $in_header = 0 ;
-
- if (!defined ($msgid)) {
- $msgid = sprintf('<no-msgid-in-msg-%06d@%s.masses.spamassasin.org>', $count, $host);
- push (@msg, "Message-Id: $msgid\n");
- }
- }
- if ($in_header) {
- /^Message-Id: (.*)\s*$/i and $msgid = $1;
- }
-
- /^From \S+ +... ... / and last;
- push (@msg, $_);
- }
-
- $msgid = "$folder:$msgid"; # so we can find it again
- $msgid =~ s/\s/_/gs; # make safe
-
- # print "JMD $wantid $msgid\n";
-
- if ($wantid ne $msgid) { next; }
- return \@msg;
- }
-
- close MBOX; $CURRENT_MBOX_OPEN = undef;
-}
-
-###########################################################################
-
-sub handle {
- my $msgp = shift;
- print STDOUT "From nobody\@nowhere Wed Aug 21 12:41:07 2002\n", @$msgp, "\n";
-}
-
-sub mywarn {
- warn @_;
- if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; }
-}
+exec("./mboxget", @ARGV);

Modified: spamassassin/trunk/masses/mboxget
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/mboxget?rev=202274&r1=202273&r2=202274&view=diff
==============================================================================
--- spamassassin/trunk/masses/mboxget (original)
+++ spamassassin/trunk/masses/mboxget Tue Jun 28 13:08:15 2005
@@ -1,8 +1,12 @@
#!/usr/bin/perl -w

-# mboxget - get a message from a mailbox
+# mboxget - get a message from a mailbox or maildir, from mass-check output
#
-# usage: mboxget [mass-check-mbox-id ...]
+# usage: mboxget [-noannotate] [mass-check-mbox-or-file-id ...]
+#
+# example:
+#
+# grep SUBJECT_FREQ spam.log | ./mboxget | grep Subject:
#
# <@LICENSE>
# Copyright 2004 Apache Software Foundation
@@ -25,38 +29,89 @@
my $prog = $0;
$prog =~ s@.*/@@;

+sub mywarn;
+
+my $annotate = 1;
+while ($#ARGV >= 0) {
+ $_ = $ARGV[0]; shift;
+ if ($_ eq '-noannotate') { $annotate = 0; }
+ else { unshift @ARGV, $_; last; }
+}
+
my @inputs;
push @inputs, @ARGV;

if (!@inputs) {
while (<STDIN>) {
- if (/^[Y.]\s+-?\d+\s+(\S+)\s+\S+/) {
+ s/^[^\s:]+://; # filenames, from "grep foo *"
+
+ if (/^[Y\.]\s+-?\d+\s+(\S+)\s+\S+/) {
# mass-check format
- push @inputs, $1;
+ handle_input($1);
}
else {
next if /^#/;
chomp;
- push @inputs, $_;
+ handle_input($_);
}
}
}
+exit;

-foreach my $where (@inputs) {
+sub handle_input {
+ my $where = shift;
my ($file, $offset) = ($where =~ m/(.*?)(?:\.(\d+))?$/);
- open(INPUT, $file) || die("$prog: open $file failed: $!\n");
+
+ if ($file =~ /\.gz$/) {
+ open (INPUT, "gunzip -cd $file |") or mywarn "gunzip $file failed: $!";
+ } elsif ($file =~ /\.bz2$/) {
+ open (INPUT, "bzip2 -cd $file |") or mywarn "bunzip2 $file failed: $!";
+ } else {
+ open (INPUT, "<$file") or mywarn "open $file failed: $!";
+ }
+
if ($offset) {
- seek(INPUT, $offset, 0) || die("$prog: seek $offset failed: $!\n");
+ # TODO: steal open-file caching code from old revisions of
+ # mass-check-results-to-mbox
+ if (!seek(INPUT, $offset, 0)) {
+ mywarn "$prog: seek $offset failed: $!\n";
+ close INPUT;
+ return;
+ }
}
+
+ # read the message into @msg
my $past = 0;
+ my @msg = ();
while (<INPUT>) {
- if ($past) {
+ if ($past && $offset) {
+ # only do this for mboxes
last if substr($_,0,5) eq "From ";
}
else {
$past = 1;
}
- print $_;
+ push (@msg, $_);
}
close INPUT;
+
+ # now chop off the leading headers that may have come from a previous
+ # run, or will interfere with insertion of the X-Mass-Check-Id hdr
+ my $fromline = "From nobody\@nowhere Wed Jan 1 00:00:00 2000\n";
+ while (scalar @msg > 0 &&
+ $msg[0] =~ /^(?:From|X-Mass-Check-Id:) /)
+ {
+ if ($msg[0] =~ /^From /) { $fromline = $msg[0]; }
+ shift @msg;
+ }
+
+ # and output
+ $annotate and unshift (@msg, "X-Mass-Check-Id: $where\n");
+ print $fromline, @msg, "\n";
+}
+
+sub mywarn {
+ warn @_;
+ if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; }
}
+