Mailing List Archive

svn commit: r168069 - in /spamassassin/trunk: lib/Mail/SpamAssassin/PerMsgStatus.pm t/uri_text.t
Author: quinlan
Date: Tue May 3 21:14:21 2005
New Revision: 168069

URL: http://svn.apache.org/viewcvs?rev=168069&view=rev
Log:
more URI extraction tweaks for a new corner case

Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/trunk/t/uri_text.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=168069&r1=168068&r2=168069&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue May 3 21:14:21 2005
@@ -1789,9 +1789,14 @@
# $label = q/[A-Za-z\d](?:[A-Za-z\d-]{0,61}[A-Za-z\d])?/;
# $domain = qq<$label(?:\.$label)*>;
# length($host) <= 255 && $host =~ /^($domain)$/
-# massively simplified from grammar, only matches known TLDs, a single
-# dot at end of TLD works, skip ones that will match as email addresses
-my $schemelessRE = qr/(?<!.\@)\b[a-z\d]
+# changes:
+# massively simplified from grammar, only matches known TLDs, a single
+# dot at end of TLD works
+# negative look-behinds:
+# (?<![a-z\d][.-]) = don't let there be more hostname behind, but
+# don't miss ".....www.bar.com" or "-----www.foo.com"
+# (?<!.\@) = this will be caught by the email address regular expression
+my $schemelessRE = qr/(?<![a-z\d][.-])(?<!.\@)\b[a-z\d]
[a-z\d.-]{0,251}
\.${tldsRE}\.?\b
(?![a-z\d.-])

Modified: spamassassin/trunk/t/uri_text.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/uri_text.t?rev=168069&r1=168068&r2=168069&view=diff
==============================================================================
--- spamassassin/trunk/t/uri_text.t (original)
+++ spamassassin/trunk/t/uri_text.t Tue May 3 21:14:21 2005
@@ -57,13 +57,13 @@
# run patterns and anti-patterns
my $failures = 0;
for my $pattern (keys %patterns) {
- if ($error !~ /\Q${pattern}\E/) {
+ if ($error !~ /${pattern}/) {
print "did not find $pattern\n";
$failures++;
}
}
for my $anti_pattern (keys %anti_patterns) {
- if ($error =~ /\Q${anti_pattern}\E/) {
+ if ($error =~ /${anti_pattern}/) {
print "did find $anti_pattern\n";
$failures++;
}
@@ -85,6 +85,8 @@

EOF
while (<DATA>) {
+ chomp;
+ next if /^#/;
if (/^(.*?)\t+(.*?)\s*$/) {
my $string = $1;
my @patterns = split(' ', $2);
@@ -145,3 +147,5 @@
xyz..geifoza0.com !geifoza0

joe@koja3fui.koja3fui !koja3fui
+
+<xuq@dsj.x.thriyi.com> mailto:xuq@dsj.x.thriyi.com !http\S*thriyi