Mailing List Archive

svn commit: r1915475 - /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm
Author: gbechis
Date: Tue Jan 30 09:32:17 2024
New Revision: 1915475

URL: http://svn.apache.org/viewvc?rev=1915475&view=rev
Log:
extract more URIs from pdf files

Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm?rev=1915475&r1=1915474&r2=1915475&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm Tue Jan 30 09:32:17 2024
@@ -328,8 +328,14 @@ sub _get_pdf_details {
}

# XXX some pdf have uris but are stored inside binary data
- if (keys %uris < 20 && $line =~ /(?:\/S\s{0,2}\/URI\s{0,2}|^\s*)\/URI\s{0,2}( \( .*? (?<!\\) \) | < [^>]* > )/x) {
- my $location = _parse_string($1);
+ if (keys %uris < 20 && $line =~ /(?:\/S\s{0,2}\/URI\s{0,2}|^\s*)\/URI\s{0,2}( \( .*? (?<!\\) \) | < [^>]* > )|\((https?:\/\/.{8,256})\)>>/x) {
+ my $location;
+ if (defined $1 and (index($1, '.') > 0)) {
+ $location = _parse_string($1);
+ }
+ if (not defined($location) or index($location, '.') <= 0) {
+ $location = _parse_string($2);
+ }
next unless index($location, '.') > 0; # ignore some binary mess
next if $location =~ /\0/; # ignore urls with NUL characters
if (!exists $uris{$location}) {