Mailing List Archive

svn commit: r1879700 - in /spamassassin/branches/3.4/lib/Mail/SpamAssassin: Message/Metadata/Received.pm PerMsgStatus.pm
Author: hege
Date: Thu Jul 9 10:47:48 2020
New Revision: 1879700

URL: http://svn.apache.org/viewvc?rev=1879700&view=rev
Log:
Backport EnvelopeFrom fixes from trunk (Revision 1844628,1864383) (Bug 7834)

Modified:
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Metadata/Received.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Metadata/Received.pm?rev=1879700&r1=1879699&r2=1879700&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Metadata/Received.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message/Metadata/Received.pm Thu Jul 9 10:47:48 2020
@@ -53,7 +53,7 @@ use Mail::SpamAssassin::Constants qw(:ip
# ---------------------------------------------------------------------------

sub parse_received_headers {
- my ($self, $permsgstatus, $msg) = @_;
+ my ($self, $pms, $msg) = @_;

# a caller may assert that a message is coming from inside or from an
# authenticated roaming users; this info may not be available in mail
@@ -88,11 +88,11 @@ sub parse_received_headers {
$self->{allow_mailfetch_markers} = 1; # This needs to be set for the
# first Received: header
# now figure out what relays are trusted...
- my $trusted = $permsgstatus->{main}->{conf}->{trusted_networks};
- my $internal = $permsgstatus->{main}->{conf}->{internal_networks};
- my $msa = $permsgstatus->{main}->{conf}->{msa_networks};
- my $did_user_specify_trust = $permsgstatus->{main}->{conf}->{trusted_networks_configured};
- my $did_user_specify_internal = $permsgstatus->{main}->{conf}->{internal_networks_configured};
+ my $trusted = $pms->{main}->{conf}->{trusted_networks};
+ my $internal = $pms->{main}->{conf}->{internal_networks};
+ my $msa = $pms->{main}->{conf}->{msa_networks};
+ my $did_user_specify_trust = $pms->{main}->{conf}->{trusted_networks_configured};
+ my $did_user_specify_internal = $pms->{main}->{conf}->{internal_networks_configured};
my $in_trusted = 1;
my $in_internal = 1;
my $found_msa = 0;
@@ -126,8 +126,7 @@ sub parse_received_headers {
# Now add the single line headers like X-Originating-IP. (bug 5680)
# we convert them into synthetic "Received" headers so we can share
# code below.
- for my $header (@{$permsgstatus->{main}->{conf}->{originating_ip_headers}})
- {
+ foreach my $header (@{$pms->{main}->{conf}->{originating_ip_headers}}) {
my $str = $msg->get_header($header);
next unless ($str && $str =~ m/($IP_ADDRESS)/);
push @hdrs, "from X-Originating-IP: $1\n";
@@ -335,7 +334,7 @@ sub parse_received_line {
my $by = '';
my $id = '';
my $ident = '';
- my $envfrom = '';
+ my $envfrom = undef;
my $mta_looked_up_dns = 0;
my $IP_ADDRESS = IP_ADDRESS;
my $IP_PRIVATE = IP_PRIVATE;
@@ -1211,8 +1210,11 @@ sub parse_received_line {
# details of the handover described here, it's just qmail-scanner
# logging a little more.
if (/^\S+ by \S+ \(.{0,100}\) with qmail-scanner/) {
- $envfrom =~ s/^\s*<*//gs; $envfrom =~ s/>*\s*$//gs;
- $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
+ if (defined $envfrom) {
+ $envfrom =~ s/^\s*<*//gs;
+ $envfrom =~ s/>*\s*$//gs;
+ $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
+ }
$self->{qmail_scanner_env_from} = $envfrom; # hack!
return 0;
}
@@ -1367,19 +1369,23 @@ enough:
# (only handles 'alternative form', not 'preferred form' - to be improved)
$ip =~ s/^0*:0*:(?:0*:)*ffff:(\d+\.\d+\.\d+\.\d+)$/$1/i;

- $envfrom =~ s/^\s*<*//gs; $envfrom =~ s/>*\s*$//gs;
$by =~ s/\;$//;

# ensure invalid chars are stripped. Replace with '!' to flag their
# presence, though. NOTE: this means "[1.2.3.4]" IP addr HELO
# strings, which are legit by RFC-2821, look like "!1.2.3.4!".
# still useful though.
- $ip =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $rdns =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $helo =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $by =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $ident =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
- $envfrom =~ s/[\s\000\#\[\]\(\)\<\>\|]/!/gs;
+ my $strip_chars = qr/[\s\000\#\[\]\(\)\<\>\|]/;
+ $ip =~ s/$strip_chars/!/gs;
+ $rdns =~ s/$strip_chars/!/gs;
+ $helo =~ s/$strip_chars/!/gs;
+ $by =~ s/$strip_chars/!/gs;
+ $ident =~ s/$strip_chars/!/gs;
+ if (defined $envfrom) {
+ $envfrom =~ s/^\s*<*//gs;
+ $envfrom =~ s/>*\s*$//gs;
+ $envfrom =~ s/$strip_chars/!/gs;
+ }

my $relay = {
ip => $ip,
@@ -1426,7 +1432,10 @@ sub make_relay_as_string {
# of entries must be preserved, so that regexps that assume that
# e.g. "ip" comes before "helo" will still work.
#
- my $asstr = "[. ip=$relay->{ip} rdns=$relay->{rdns} helo=$relay->{helo} by=$relay->{by} ident=$relay->{ident} envfrom=$relay->{envfrom} intl=0 id=$relay->{id} auth=$relay->{auth} msa=0 ]";
+
+ # we could mark envfrom as "undef" if missing? dunno if needed?
+ my $envfrom = $relay->{envfrom} || '';
+ my $asstr = "[. ip=$relay->{ip} rdns=$relay->{rdns} helo=$relay->{helo} by=$relay->{by} ident=$relay->{ident} envfrom=$envfrom intl=0 id=$relay->{id} auth=$relay->{auth} msa=0 ]";
dbg("received-header: parsed as $asstr");
$relay->{as_string} = $asstr;
}

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1879700&r1=1879699&r2=1879700&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm Thu Jul 9 10:47:48 2020
@@ -2928,11 +2928,13 @@ sub _test_log_line {

###########################################################################

-# helper for get(). Do not call directly, as get() caches its results
-# and this does not!
+# helper for get()
sub get_envelope_from {
my ($self) = @_;

+ # Cached?
+ return $self->{envelopefrom} if exists $self->{envelopefrom};
+
# bug 2142:
# Get the SMTP MAIL FROM:, aka. the "envelope sender", if our
# calling app has helpfully marked up the source message
@@ -2948,17 +2950,22 @@ sub get_envelope_from {
# make sure we get the most recent copy - there can be only one EnvelopeSender.
$envf = $self->get($self->{conf}->{envelope_sender_header}.":addr",undef);
# ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)
- goto ok if defined $envf && ($envf =~ /\@/ || $envf eq '');
+ if (defined $envf && (index($envf, '@') > 0 || $envf eq '')) {
+ dbg("message: using envelope_sender_header '%s' as EnvelopeFrom: '%s'",
+ $self->{conf}->{envelope_sender_header}, $envf);
+ $self->{envelopefrom} = $envf;
+ return $envf;
+ }
# Warn them if it's configured, but not there or not usable.
if (defined $envf) {
- chomp $envf;
- dbg("message: envelope_sender_header '%s: %s' is not an FQDN, ignoring",
+ dbg("message: envelope_sender_header '%s': '%s' is not valid, ignoring",
$self->{conf}->{envelope_sender_header}, $envf);
} else {
dbg("message: envelope_sender_header '%s' not found in message",
$self->{conf}->{envelope_sender_header});
}
# Couldn't get envelope-sender using the configured header.
+ $self->{envelopefrom} = undef;
return;
}

@@ -2968,18 +2975,21 @@ sub get_envelope_from {
# if possible... use the last untrusted header, in case there's
# trusted headers.
my $lasthop = $self->{relays_untrusted}->[0];
+ my $lasthop_str = 'last untrusted';
if (!defined $lasthop) {
# no untrusted headers? in that case, the message is ALL_TRUSTED.
# use the first trusted header (ie. the oldest, originating one).
$lasthop = $self->{relays_trusted}->[-1];
+ $lasthop_str = 'first trusted';
}

if (defined $lasthop) {
$envf = $lasthop->{envfrom};
- # TODO FIXME: Received.pm puts both null senders and absence-of-sender
- # into the relays array as '', so we can't distinguish them :(
- if ($envf && ($envf =~ /\@/)) {
- goto ok;
+ # ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)
+ if (defined $envf && (index($envf, '@') > 0 || $envf eq '')) {
+ dbg("message: using $lasthop_str relay envelope-from as EnvelopeFrom: '$envf'");
+ $self->{envelopefrom} = $envf;
+ return $envf;
}
}

@@ -2991,34 +3001,40 @@ sub get_envelope_from {
# lines, we cannot trust any Envelope-From headers, since they're likely to
# be incorrect fetchmail guesses.

- if ($self->get("X-Sender") =~ /\@/) {
+ if (index($self->get("X-Sender"), '@') != -1) {
my $rcvd = join(' ', $self->get("Received"));
- if ($rcvd =~ /\(fetchmail/) {
+ if (index($rcvd, '(fetchmail') != -1) {
dbg("message: X-Sender and fetchmail signatures found, cannot trust envelope-from");
+ $self->{envelopefrom} = undef;
return;
}
}

# procmailrc notes this (we now recommend adding it to Received instead)
- if ($envf = $self->get("X-Envelope-From")) {
+ if (defined($envf = $self->get("X-Envelope-From:addr",undef))) {
# heuristic: this could have been relayed via a list which then used
# a *new* Envelope-from. check
if ($self->get("ALL") =~ /^Received:.*?^X-Envelope-From:/smi) {
dbg("message: X-Envelope-From header found after 1 or more Received lines, cannot trust envelope-from");
+ $self->{envelopefrom} = undef;
return;
} else {
- goto ok;
+ dbg("message: using X-Envelope-From header as EnvelopeFrom: '$envf'");
+ $self->{envelopefrom} = $envf;
+ return $envf;
}
}

# qmail, new-inject(1)
- if ($envf = $self->get("Envelope-Sender")) {
+ if (defined($envf = $self->get("Envelope-Sender:addr",undef))) {
# heuristic: this could have been relayed via a list which then used
# a *new* Envelope-from. check
if ($self->get("ALL") =~ /^Received:.*?^Envelope-Sender:/smi) {
dbg("message: Envelope-Sender header found after 1 or more Received lines, cannot trust envelope-from");
} else {
- goto ok;
+ dbg("message: using Envelope-Sender header as EnvelopeFrom: '$envf'");
+ $self->{envelopefrom} = $envf;
+ return $envf;
}
}

@@ -3029,24 +3045,21 @@ sub get_envelope_from {
# data. This use of return-path is required; mail systems MUST support
# it. The return-path line preserves the information in the <reverse-
# path> from the MAIL command.
- if ($envf = $self->get("Return-Path")) {
+ if (defined($envf = $self->get("Return-Path:addr",undef))) {
# heuristic: this could have been relayed via a list which then used
# a *new* Envelope-from. check
if ($self->get("ALL") =~ /^Received:.*?^Return-Path:/smi) {
dbg("message: Return-Path header found after 1 or more Received lines, cannot trust envelope-from");
} else {
- goto ok;
+ dbg("message: using Return-Path header as EnvelopeFrom: '$envf'");
+ $self->{envelopefrom} = $envf;
+ return $envf;
}
}

# give up.
+ $self->{envelopefrom} = undef;
return;
-
-ok:
- $envf =~ s/^<*//s; # remove <
- $envf =~ s/>*\s*\z//s; # remove >, whitespace, newlines
-
- return $envf;
}

###########################################################################