Mailing List Archive

svn commit: r1885696 - in /spamassassin/trunk: lib/Mail/SpamAssassin/Message/Node.pm t/data/nice/unicode2
Author: jhardin
Date: Tue Jan 19 19:01:27 2021
New Revision: 1885696

URL: http://svn.apache.org/viewvc?rev=1885696&view=rev
Log:
Bug 7880 - fix nullref weakness in utf-16 _normalize() debug message for some UTF-16 data; modify test to cover that condition; modify detect_utf16() to skip the data scan if a BOM is present (for efficiency, as Perl's UTF-16 decoder will figure out the endianness from the BOM)

Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
spamassassin/trunk/t/data/nice/unicode2

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1885696&r1=1885695&r2=1885696&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Tue Jan 19 19:01:27 2021
@@ -388,6 +388,12 @@ sub detect_utf16 {
my $sum_l_o = 0;
my $decoder = undef;

+ # avoid scan if BOM present
+ if( $data =~ /^(?:\xff\xfe|\xfe\xff)/ ) {
+ dbg( "message: detect_utf16: found BOM" );
+ return undef; # let perl figure it out from the BOM
+ }
+
my @msg_h = unpack 'H' x length( $data ), $data;
my @msg_l = unpack 'h' x length( $data ), $data;

@@ -518,20 +524,22 @@ sub _normalize {
# https://bz.apache.org/SpamAssassin/show_bug.cgi?id=7252

my $decoder = detect_utf16( $_[0] );
- if (eval { $rv = $decoder->decode($_[0], 1|8); defined $rv }) {
- dbg("message: decoded as charset %s, declared %s",
- $decoder->name, $charset_declared);
- return $_[0] if !$return_decoded;
- $rv .= $data_taint; # carry taintedness over, avoid Encode bug
- return $rv; # decoded
- } else {
- my $err = '';
- if ($@) {
- $err = $@; $err =~ s/\s+/ /gs; $err =~ s/(.*) at .*/$1/;
- $err = " ($err)";
+ if (defined $decoder) {
+ if (eval { $rv = $decoder->decode($_[0], 1|8); defined $rv }) {
+ dbg("message: decoded as charset %s, declared %s",
+ $decoder->name, $charset_declared);
+ return $_[0] if !$return_decoded;
+ $rv .= $data_taint; # carry taintedness over, avoid Encode bug
+ return $rv; # decoded
+ } else {
+ my $err = '';
+ if ($@) {
+ $err = $@; $err =~ s/\s+/ /gs; $err =~ s/(.*) at .*/$1/;
+ $err = " ($err)";
+ }
+ dbg("message: failed decoding as charset %s, declared %s%s",
+ $decoder->name, $charset_declared, $err);
}
- dbg("message: failed decoding as charset %s, declared %s%s",
- $decoder->name, $charset_declared, $err);
};
} else {
# try decoding as a declared character set

Modified: spamassassin/trunk/t/data/nice/unicode2
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/data/nice/unicode2?rev=1885696&r1=1885695&r2=1885696&view=diff
==============================================================================
--- spamassassin/trunk/t/data/nice/unicode2 (original)
+++ spamassassin/trunk/t/data/nice/unicode2 Tue Jan 19 19:01:27 2021
@@ -1,4 +1,4 @@
-From: test
+From: =?UTF-16?B?//492Enc?= test
To: test
Message-ID: <123@test.example.com>
Date: Thu, 16 Jun 2016 00:41:19 (UTC)