Mailing List Archive

svn commit: r497030 - in /spamassassin/trunk: ./ MANIFEST spamc/libspamc.c spamc/libspamc.h spamc/spamc.c spamc/spamc.pod spamd/PROTOCOL spamd/spamd.raw t/spamc_headers.t
Author: jm
Date: Wed Jan 17 06:52:17 2007
New Revision: 497030

URL: http://svn.apache.org/viewvc?view=rev&rev=497030
Log:
bug 5296: add spamc --headers switch, which scans messages and transmits back just rewritten headers. This is more bandwidth-efficient than the normal mode of scanning, but only works for 'report_safe 0'. Bump spamc/spamd's protocol version to 1.4, to reflect new HEADERS verb. update spamd/PROTOCOL for current protocol. add 'sa-compile' to the SVN ignored-files list.

Added:
spamassassin/trunk/t/spamc_headers.t (with props)
Modified:
spamassassin/trunk/ (props changed)
spamassassin/trunk/MANIFEST
spamassassin/trunk/spamc/libspamc.c
spamassassin/trunk/spamc/libspamc.h
spamassassin/trunk/spamc/spamc.c
spamassassin/trunk/spamc/spamc.pod
spamassassin/trunk/spamd/PROTOCOL
spamassassin/trunk/spamd/spamd.raw

Propchange: spamassassin/trunk/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Wed Jan 17 06:52:17 2007
@@ -17,6 +17,7 @@
sa-filter
sa-learn
sa-update
+sa-compile
site_perl
spamassassin
testmails

Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Wed Jan 17 06:52:17 2007
@@ -478,3 +478,4 @@
t/basic_obj_api.t
t/tainted_msg.t
t/data/taintcheckplugin.pm
+t/spamc_headers.t

Modified: spamassassin/trunk/spamc/libspamc.c
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamc/libspamc.c?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamc/libspamc.c (original)
+++ spamassassin/trunk/spamc/libspamc.c Wed Jan 17 06:52:17 2007
@@ -139,7 +139,7 @@
*/

/* Set the protocol version that this spamc speaks */
-static const char *PROTOCOL_VERSION = "SPAMC/1.3";
+static const char *PROTOCOL_VERSION = "SPAMC/1.4";

/* "private" part of struct message.
* we use this instead of the struct message directly, so that we
@@ -148,6 +148,7 @@
struct libspamc_private_message
{
int flags; /* copied from "flags" arg to message_read() */
+ int alloced_size; /* allocated space for the "out" buffer */
};

int libspamc_timeout = 0;
@@ -730,6 +731,7 @@
return EX_OSERR;
}
m->priv->flags = flags;
+ m->priv->alloced_size = 0;

if (flags & SPAMC_PING) {
_clear_message(m);
@@ -1065,6 +1067,55 @@
#endif
}

+int
+_append_original_body (struct message *m, int flags)
+{
+ char *cp, *cpend, *bodystart;
+ int bodylen, outspaceleft, towrite;
+
+ /* at this stage, m->out now contains the rewritten headers.
+ * find and append the raw message's body, up to m->priv->alloced_size
+ * bytes.
+ */
+
+#define CRNLCRNL "\r\n\r\n"
+#define CRNLCRNL_LEN 4
+#define NLNL "\n\n"
+#define NLNL_LEN 2
+
+ cpend = m->raw + m->raw_len;
+ bodystart = NULL;
+
+ for (cp = m->raw; cp < cpend; cp++) {
+ if (*cp == '\r' && cpend - cp >= CRNLCRNL_LEN &&
+ !strncmp(cp, CRNLCRNL, CRNLCRNL_LEN))
+ {
+ bodystart = cp + CRNLCRNL_LEN;
+ break;
+ }
+ else if (*cp == '\n' && cpend - cp >= NLNL_LEN &&
+ !strncmp(cp, NLNL, NLNL_LEN))
+ {
+ bodystart = cp + NLNL_LEN;
+ break;
+ }
+ }
+
+ if (bodystart == NULL) {
+ libspamc_log(flags, LOG_ERR, "failed to find end-of-headers");
+ return EX_SOFTWARE;
+ }
+
+ bodylen = cpend - bodystart;
+ outspaceleft = (m->priv->alloced_size-1) - m->out_len;
+ towrite = (bodylen < outspaceleft ? bodylen : outspaceleft);
+
+ /* copy in the body; careful not to overflow */
+ strncpy (m->out + m->out_len, bodystart, towrite);
+ m->out_len += towrite;
+ return EX_OK;
+}
+
int message_filter(struct transport *tp, const char *username,
int flags, struct message *m)
{
@@ -1118,7 +1169,8 @@
}

m->is_spam = EX_TOOBIG;
- if ((m->outbuf = malloc(m->max_len + EXPANSION_ALLOWANCE + 1)) == NULL) {
+ m->priv->alloced_size = m->max_len + EXPANSION_ALLOWANCE + 1;
+ if ((m->outbuf = malloc(m->priv->alloced_size)) == NULL) {
failureval = EX_OSERR;
goto failure;
}
@@ -1136,6 +1188,8 @@
strcpy(buf, "SYMBOLS ");
else if (flags & SPAMC_PING)
strcpy(buf, "PING ");
+ else if (flags & SPAMC_HEADERS)
+ strcpy(buf, "HEADERS ");
else
strcpy(buf, "PROCESS ");

@@ -1293,20 +1347,17 @@

if (flags & SPAMC_USE_SSL) {
len = full_read_ssl(ssl, (unsigned char *) m->out + m->out_len,
- m->max_len + EXPANSION_ALLOWANCE + 1 -
- m->out_len,
- m->max_len + EXPANSION_ALLOWANCE + 1 -
- m->out_len);
+ m->priv->alloced_size - m->out_len,
+ m->priv->alloced_size - m->out_len);
}
else {
len = full_read(sock, 0, m->out + m->out_len,
- m->max_len + EXPANSION_ALLOWANCE + 1 - m->out_len,
- m->max_len + EXPANSION_ALLOWANCE + 1 -
- m->out_len);
+ m->priv->alloced_size - m->out_len,
+ m->priv->alloced_size - m->out_len);
}


- if (len + m->out_len > m->max_len + EXPANSION_ALLOWANCE) {
+ if (len + m->out_len > (m->priv->alloced_size-1)) {
failureval = EX_TOOBIG;
goto failure;
}
@@ -1326,6 +1377,12 @@
goto failure;
}

+ if (flags & SPAMC_HEADERS) {
+ if (_append_original_body(m, flags) != EX_OK) {
+ goto failure;
+ }
+ }
+
return EX_OK;

failure:
@@ -1344,7 +1401,6 @@
return failureval;
}

-
int message_process(struct transport *trans, char *username, int max_size,
int in_fd, int out_fd, const int flags)
{
@@ -1426,7 +1482,8 @@
}

m->is_spam = EX_TOOBIG;
- if ((m->outbuf = malloc(m->max_len + EXPANSION_ALLOWANCE + 1)) == NULL) {
+ m->priv->alloced_size = m->max_len + EXPANSION_ALLOWANCE + 1;
+ if ((m->outbuf = malloc(m->priv->alloced_size)) == NULL) {
failureval = EX_OSERR;
goto failure;
}

Modified: spamassassin/trunk/spamc/libspamc.h
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamc/libspamc.h?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamc/libspamc.h (original)
+++ spamassassin/trunk/spamc/libspamc.h Wed Jan 17 06:52:17 2007
@@ -125,6 +125,9 @@
/* Nov 30, 2006 jm: add -z, zlib support */
#define SPAMC_USE_ZLIB (1<<16)

+/* Jan 16, 2007 jm: get markup headers from spamd */
+#define SPAMC_HEADERS (1<<15)
+
#define SPAMC_MESSAGE_CLASS_SPAM 1
#define SPAMC_MESSAGE_CLASS_HAM 2


Modified: spamassassin/trunk/spamc/spamc.c
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamc/spamc.c?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamc/spamc.c (original)
+++ spamassassin/trunk/spamc/spamc.c Wed Jan 17 06:52:17 2007
@@ -182,6 +182,7 @@
usg(" -r, --full-spam Print full report for messages identified as\n"
" spam.\n");
usg(" -R, --full Print full report for all messages.\n");
+ usg(" --headers Rewrite only the message headers.\n");
usg(" -E, --exitcode Filter as normal, and set an exit code.\n");

usg(" -x, --no-safe-fallback\n"
@@ -214,9 +215,9 @@
struct transport *ptrn)
{
#ifndef _WIN32
- const char *opts = "-BcrRd:e:fyp:t:s:u:L:C:xzSHU:ElhVKF:0:1:";
+ const char *opts = "-BcrRd:e:fyp:t:s:u:L:C:xzSHU:ElhVKF:0:1:2";
#else
- const char *opts = "-BcrRd:fyp:t:s:u:L:C:xzSHElhVKF:0:1:";
+ const char *opts = "-BcrRd:fyp:t:s:u:L:C:xzSHElhVKF:0:1:2";
#endif
int opt;
int ret = EX_OK;
@@ -241,6 +242,7 @@
{ "tests", no_argument, 0, 'y' },
{ "full-spam", no_argument, 0, 'r' },
{ "full", no_argument, 0, 'R' },
+ { "headers", no_argument, 0, 2 },
{ "exitcode", no_argument, 0, 'E' },
{ "no-safe-fallback", no_argument, 0, 'x' },
{ "log-to-stderr", no_argument, 0, 'l' },
@@ -457,6 +459,11 @@
ptrn->retry_sleep = atoi(spamc_optarg);
break;
}
+ case 2:
+ {
+ flags |= SPAMC_HEADERS;
+ break;
+ }
}
}

@@ -473,7 +480,7 @@
ret = EX_USAGE;
}
if (flags & SPAMC_PING) {
- libspamc_log(flags, LOG_ERR, "Learning excludes ping");
+ libspamc_log(flags, LOG_ERR, "Learning excludes ping");
ret = EX_USAGE;
}
if (flags & SPAMC_REPORT_IFSPAM) {

Modified: spamassassin/trunk/spamc/spamc.pod
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamc/spamc.pod?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamc/spamc.pod (original)
+++ spamassassin/trunk/spamc/spamc.pod Wed Jan 17 06:52:17 2007
@@ -229,6 +229,17 @@
C<Compress::Zlib> perl module on the server side; an error will be returned
otherwise.

+=item B<--headers>
+
+Perform a scan, but instead of allowing any part of the message (header and
+body) to be rewritten, limit rewriting to only the message headers. This is
+much more efficient in bandwidth usage, since the response message transmitted
+back from the spamd server will not include the body.
+
+Note that this only makes sense if you are using C<report_safe 0> in the
+scanning configuration on the remote end; with C<report_safe 1>, it is
+likely to result in corrupt messages.
+
=back

=head1 CONFIGURATION FILE

Modified: spamassassin/trunk/spamd/PROTOCOL
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamd/PROTOCOL?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamd/PROTOCOL (original)
+++ spamassassin/trunk/spamd/PROTOCOL Wed Jan 17 06:52:17 2007
@@ -25,7 +25,7 @@
After each side is done writing, it shuts down its side of the connection.

The first line from spamc is the command for spamd to execute (PROCESS a
-message is the command in protocol<=1.2) followed by the protocol version.
+message is the command in protocol<=1.3) followed by the protocol version.

There may be additional headers following the command, which are as yet
undefined. Servers should ignore these, and keep looking for headers which
@@ -45,7 +45,7 @@
Commands
--------

-The following commands are defined as of protocol 1.2:
+The following commands are defined as of protocol 1.4:

CHECK -- Just check if the passed message is spam or not and reply as
described below
@@ -70,6 +70,9 @@
with that message. This includes setting or removing a local
or a remote database (learning, reporting, forgetting, revoking).

+HEADERS -- Same as PROCESS, but return only modified headers, not body
+ (new in protocol 1.4)
+

CHECK command returns just a header (terminated by "\r\n\r\n") with the first
line as for PROCESS (ie a response code and message), and then a header called
@@ -94,7 +97,8 @@
rule names, separated by commas. Note that some versions of the protocol
terminate this line with "\r\n", and some do not, due to an oversight; so
clients should be flexible on whether or not a CR-LF pair follows
-the symbol text, and how many CR-LFs there are.
+the symbol text, and how many CR-LFs there are. Protocol version 1.3
+onwards will always not terminate the line with "\r\n".


REPORT command returns the same as CHECK, followed immediately by the report
@@ -122,7 +126,7 @@
SKIP) no additional input is expected. It returns a simple confirmation
response, like this:

- SPAMD/1.2 0 PONG\r\n
+ SPAMD/1.4 0 PONG\r\n

This facility may be useful for monitoring programs which wish to check that
the daemon is alive and providing at least a basic response within a reasonable
@@ -153,15 +157,22 @@
Set: local\r\n
Remove: remote\r\n

+HEADERS returns the same as PROCESS, up to and including the double-newline
+separator between message headers and body -- but stops there. It was
+added in SpamAssassin 3.2.0. Note that this requires protocol version
+1.4.
+
+
+
Headers
-------

-The following optional headers are defined as of protocol 1.2:
+The following optional headers are defined as of protocol 1.4:

Content-length

Length of a request or response body, in bytes (generally a requirement
- as of protocol 1.2)
+ as of protocol version 1.2 onwards)

Spam

@@ -177,9 +188,10 @@

An optional header, sent by the client to the server, whose value may
consist of the string "zlib", indicating that the message body transmitted
- by the client is compressed using Zlib compression. This is new in
- SpamAssassin 3.2.0.
+ by the client is compressed using Zlib compression. (This is new in
+ SpamAssassin 3.2.0.)

As-yet-undefined headers should not be treated as errors, and instead
-should be ignored.
+should be ignored. Multiple headers can appear in requests and responses
+(this was not clearly defined until protocol version 1.3).


Modified: spamassassin/trunk/spamd/spamd.raw
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamd/spamd.raw?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamd/spamd.raw (original)
+++ spamassassin/trunk/spamd/spamd.raw Wed Jan 17 06:52:17 2007
@@ -1252,14 +1252,15 @@
# If we get the PROCESS command, the client is going to send a
# message that we need to filter.

- elsif (/(PROCESS|CHECK|SYMBOLS|REPORT|REPORT_IFSPAM) SPAMC\/(.*)/) {
+ elsif (/(PROCESS|CHECK|SYMBOLS|REPORT|HEADERS|REPORT_IFSPAM) SPAMC\/(.*)/) {
my $method = $1;
+ my $version = $2;
eval {
Mail::SpamAssassin::Util::trap_sigalrm_fully(sub {
die "child processing timeout";
});
alarm $timeout_child if ($timeout_child);
- check($method, $2, $start, $remote_hostname, $remote_hostaddr);
+ check($method, $version, $start, $remote_hostname, $remote_hostaddr);
};
alarm 0;

@@ -1300,7 +1301,7 @@
# Looks like a client is just seeing if we're alive.

elsif (/PING SPAMC\/(.*)/) {
- syswrite( $client, "SPAMD/1.2 $resphash{EX_OK} PONG\r\n" );
+ syswrite( $client, "SPAMD/1.4 $resphash{EX_OK} PONG\r\n" );
}

# If it was none of the above, then we don't know what it was.
@@ -1517,14 +1518,21 @@

my $spamhdr = "Spam: $response_spam_status ; $msg_score / $msg_threshold";

- if ( $method eq 'PROCESS' ) {
+ if ( $method eq 'PROCESS' || $method eq 'HEADERS' ) {

$status->set_tag('REMOTEHOSTNAME', $remote_hostname);
$status->set_tag('REMOTEHOSTADDR', $remote_hostaddr);

# Build the message to send back and measure it
my $msg_resp = $status->rewrite_mail();
+
+ if ($method eq 'HEADERS') {
+ # just the headers; delete everything after first \r\n\r\n
+ $msg_resp =~ s/(\r?\n\r?\n).*$/$1/gs;
+ }
+
my $msg_resp_length = length($msg_resp);
+
if ( $version >= 1.3 ) # Spamc protocol 1.3 means multi hdrs are OK
{
syswrite_full_buffer( $client, "SPAMD/1.1 $resphash{$resp} $resp\r\n" .

Added: spamassassin/trunk/t/spamc_headers.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/spamc_headers.t?view=auto&rev=497030
==============================================================================
--- spamassassin/trunk/t/spamc_headers.t (added)
+++ spamassassin/trunk/t/spamc_headers.t Wed Jan 17 06:52:17 2007
@@ -0,0 +1,28 @@
+#!/usr/bin/perl
+
+use lib '.'; use lib 't';
+use SATest; sa_t_init("spamc_A");
+
+use Test; plan tests => ($NO_SPAMC_EXE ? 0 : 5);
+exit if $NO_SPAMC_EXE;
+
+# ---------------------------------------------------------------------------
+
+%patterns = (
+
+ q{ Message-Id: <78w08.t365th3y6x7h@yahoo.com> } => 'msgid',
+ q{ X-Spam-Status: Yes, } => 'xss',
+ q{ TEST_NOREALNAME}, 'noreal',
+ q{ subscription cancelable at anytime } => 'body',
+
+);
+
+%anti_patterns = (
+
+);
+
+start_spamd("-L --cf='report_safe 0'");
+ok (spamcrun ("-A < data/spam/009", \&patterns_run_cb));
+ok_all_patterns();
+stop_spamd();
+

Propchange: spamassassin/trunk/t/spamc_headers.t
------------------------------------------------------------------------------
svn:executable = *