Mailing List Archive

svn commit: r439873 - /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
Author: felicity
Date: Sun Sep 3 16:36:38 2006
New Revision: 439873

URL: http://svn.apache.org/viewvc?view=rev&rev=439873
Log:
try to simplify URIDNSBL a bit -- just use PMS to store data instead of some hash w/ multiple excess reference pointers, etc.

Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm?view=diff&rev=439873&r1=439872&r2=439873
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Sun Sep 3 16:36:38 2006
@@ -161,31 +161,25 @@
return;
}

- $self->{scanner} = $scanner;
- my $scanstate = $scanner->{uribl_scanstate} = {
- self => $self,
- scanner => $scanner,
- activerules => { },
- hits => { }
- };
+ $scanner->{'uridnsbl_activerules'} = { };
+ $scanner->{'uridnsbl_hits'} = { };
+ $scanner->{'uridnsbl_seen_domain'} = { };

# only hit DNSBLs for active rules (defined and score != 0)
- $scanstate->{active_rules_rhsbl} = { };
- $scanstate->{active_rules_revipbl} = { };
+ $scanner->{'uridnsbl_active_rules_rhsbl'} = { };
+ $scanner->{'uridnsbl_active_rules_revipbl'} = { };
+
foreach my $rulename (keys %{$scanner->{conf}->{uridnsbls}}) {
next unless ($scanner->{conf}->is_rule_active('body_evals',$rulename));

- my $rulecf = $scanstate->{scanner}->{conf}->{uridnsbls}->{$rulename};
+ my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
if ($rulecf->{is_rhsbl}) {
- $scanstate->{active_rules_rhsbl}->{$rulename} = 1;
+ $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename} = 1;
} else {
- $scanstate->{active_rules_revipbl}->{$rulename} = 1;
+ $scanner->{uridnsbl_active_rules_revipbl}->{$rulename} = 1;
}
}

- $self->setup ($scanstate);
-
-
# get all domains in message

# don't keep dereferencing this
@@ -247,7 +241,8 @@
# at this point, @uri_ordered is an ordered array of uri hashes

my %domlist = ();
- while (keys %domlist < $scanner->{main}->{conf}->{uridnsbl_max_domains} && @uri_ordered) {
+ my $umd = $scanner->{main}->{conf}->{uridnsbl_max_domains};
+ while (keys %domlist < $umd && @uri_ordered) {
my $array = shift @uri_ordered;
next unless $array;

@@ -256,7 +251,7 @@
next unless @domains;

# the new domains are all useful, just add them in
- if (keys(%domlist) + @domains <= $scanner->{main}->{conf}->{uridnsbl_max_domains}) {
+ if (keys(%domlist) + @domains <= $umd) {
foreach (@domains) {
$domlist{$_} = 1;
}
@@ -264,7 +259,7 @@
else {
# trim down to a limited number - pick randomly
my $i;
- while (@domains && keys %domlist < $scanner->{main}->{conf}->{uridnsbl_max_domains}) {
+ while (@domains && keys %domlist < $umd) {
my $r = int rand (scalar @domains);
$domlist{splice (@domains, $r, 1)} = 1;
}
@@ -274,7 +269,7 @@
# and query
dbg("uridnsbl: domains to query: ".join(' ',keys %domlist));
foreach my $dom (keys %domlist) {
- $self->query_domain ($scanstate, $dom);
+ $self->query_domain ($scanner, $dom);
}

return 1;
@@ -386,22 +381,15 @@

# ---------------------------------------------------------------------------

-sub setup {
- my ($self, $scanstate) = @_;
- $scanstate->{seen_domain} = { };
-}
-
-# ---------------------------------------------------------------------------
-
sub query_domain {
- my ($self, $scanstate, $dom) = @_;
+ my ($self, $scanner, $dom) = @_;

#warn "uridnsbl: domain $dom\n";
#return;

$dom = lc $dom;
- return if $scanstate->{seen_domain}->{$dom};
- $scanstate->{seen_domain}->{$dom} = 1;
+ return if $scanner->{uridnsbl_seen_domain}->{$dom};
+ $scanner->{uridnsbl_seen_domain}->{$dom} = 1;
$self->log_dns_result("querying domain $dom");

my $obj = {
@@ -415,7 +403,7 @@
my $IP_PRIVATE = IP_PRIVATE;
# only look up the IP if it is public and valid
if ($dom =~ /^$IPV4_ADDRESS$/ && $dom !~ /^$IP_PRIVATE$/) {
- $self->lookup_dnsbl_for_ip($scanstate, $obj, $dom);
+ $self->lookup_dnsbl_for_ip($scanner, $obj, $dom);
# and check the IP in RHSBLs too
if ($dom =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
$dom = "$4.$3.$2.$1";
@@ -429,46 +417,46 @@

if ($single_dnsbl) {
# look up the domain in the RHSBL subset
- my $cf = $scanstate->{active_rules_rhsbl};
+ my $cf = $scanner->{uridnsbl_active_rules_rhsbl};
foreach my $rulename (keys %{$cf}) {
- my $rulecf = $scanstate->{scanner}->{conf}->{uridnsbls}->{$rulename};
- $self->lookup_single_dnsbl($scanstate, $obj, $rulename,
+ my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
+ $self->lookup_single_dnsbl($scanner, $obj, $rulename,
$dom, $rulecf->{zone}, $rulecf->{type});

# see comment below
- $scanstate->{scanner}->register_async_rule_start($rulename);
+ $scanner->register_async_rule_start($rulename);
}

# perform NS, A lookups to look up the domain in the non-RHSBL subset
if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/) {
- $self->lookup_domain_ns($scanstate, $obj, $dom);
+ $self->lookup_domain_ns($scanner, $obj, $dom);
}
}

# note that these rules are now underway. important: unless the
# rule hits, in the current design, these will not be considered
# "finished" until harvest_dnsbl_queries() completes
- my $cf = $scanstate->{active_rules_revipbl};
+ my $cf = $scanner->{uridnsbl_active_rules_revipbl};
foreach my $rulename (keys %{$cf}) {
- $scanstate->{scanner}->register_async_rule_start($rulename);
+ $scanner->register_async_rule_start($rulename);
}
}

# ---------------------------------------------------------------------------

sub lookup_domain_ns {
- my ($self, $scanstate, $obj, $dom) = @_;
+ my ($self, $scanner, $obj, $dom) = @_;

my $key = "NS:".$dom;
- return if $scanstate->{scanner}->{async}->get_lookup($key);
+ return if $scanner->{async}->get_lookup($key);

# dig $dom ns
- my $ent = $self->start_lookup ($scanstate, 'NS', $self->res_bgsend($scanstate, $dom, 'NS'), $key);
+ my $ent = $self->start_lookup ($scanner, 'NS', $self->res_bgsend($scanner, $dom, 'NS'), $key);
$ent->{obj} = $obj;
}

sub complete_ns_lookup {
- my ($self, $scanstate, $ent, $dom) = @_;
+ my ($self, $scanner, $ent, $dom) = @_;

my $packet = $ent->{response_packet};
my @answer = $packet->answer;
@@ -488,11 +476,11 @@
$nsmatch =~ s/\.$//;
# only look up the IP if it is public and valid
if ($nsmatch =~ /^$IPV4_ADDRESS$/ && $nsmatch !~ /^$IP_PRIVATE$/) {
- $self->lookup_dnsbl_for_ip($scanstate, $ent->{obj}, $nsmatch);
+ $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $nsmatch);
}
}
else {
- $self->lookup_a_record($scanstate, $ent->{obj}, $nsmatch);
+ $self->lookup_a_record($scanner, $ent->{obj}, $nsmatch);
}
}
}
@@ -501,25 +489,25 @@
# ---------------------------------------------------------------------------

sub lookup_a_record {
- my ($self, $scanstate, $obj, $hname) = @_;
+ my ($self, $scanner, $obj, $hname) = @_;

my $key = "A:".$hname;
- return if $scanstate->{scanner}->{async}->get_lookup($key);
+ return if $scanner->{async}->get_lookup($key);

# dig $hname a
- my $ent = $self->start_lookup ($scanstate, 'A', $self->res_bgsend($scanstate, $hname, 'A'), $key);
+ my $ent = $self->start_lookup ($scanner, 'A', $self->res_bgsend($scanner, $hname, 'A'), $key);
$ent->{obj} = $obj;
}

sub complete_a_lookup {
- my ($self, $scanstate, $ent, $hname) = @_;
+ my ($self, $scanner, $ent, $hname) = @_;

foreach my $rr ($ent->{response_packet}->answer) {
my $str = $rr->string;
$self->log_dns_result ("A for NS $hname: $str");

if ($str =~ /IN\s+A\s+(\S+)/) {
- $self->lookup_dnsbl_for_ip($scanstate, $ent->{obj}, $1);
+ $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $1);
}
}
}
@@ -527,39 +515,38 @@
# ---------------------------------------------------------------------------

sub lookup_dnsbl_for_ip {
- my ($self, $scanstate, $obj, $ip) = @_;
+ my ($self, $scanner, $obj, $ip) = @_;

$ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
my $revip = "$4.$3.$2.$1";

- my $cf = $scanstate->{active_rules_revipbl};
+ my $cf = $scanner->{uridnsbl_active_rules_revipbl};
foreach my $rulename (keys %{$cf}) {
- my $rulecf = $scanstate->{scanner}->{conf}->{uridnsbls}->{$rulename};
- $self->lookup_single_dnsbl($scanstate, $obj, $rulename,
+ my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
+ $self->lookup_single_dnsbl($scanner, $obj, $rulename,
$revip, $rulecf->{zone}, $rulecf->{type});
}
}

sub lookup_single_dnsbl {
- my ($self, $scanstate, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
+ my ($self, $scanner, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;

my $key = "DNSBL:".$dnsbl.":".$lookupstr;
- return if $scanstate->{scanner}->{async}->get_lookup($key);
+ return if $scanner->{async}->get_lookup($key);
my $item = $lookupstr.".".$dnsbl;

# dig $ip txt
- my $ent = $self->start_lookup ($scanstate, 'DNSBL',
- $self->res_bgsend($scanstate, $item, $qtype), $key);
+ my $ent = $self->start_lookup ($scanner, 'DNSBL',
+ $self->res_bgsend($scanner, $item, $qtype), $key);
$ent->{obj} = $obj;
$ent->{rulename} = $rulename;
$ent->{zone} = $dnsbl;
}

sub complete_dnsbl_lookup {
- my ($self, $scanstate, $ent, $dnsblip) = @_;
+ my ($self, $scanner, $ent, $dnsblip) = @_;

- my $scan = $scanstate->{scanner};
- my $conf = $scan->{conf};
+ my $conf = $scanner->{conf};
my @subtests = ();
my $rulename = $ent->{rulename};
my $rulecf = $conf->{uridnsbls}->{$rulename};
@@ -583,7 +570,7 @@
$packet->header->id." rr=".$rr->string);
next;
}
- $self->got_dnsbl_hit($scanstate, $ent, $rdatastr, $dom, $rulename);
+ $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $rulename);
}
else {
foreach my $subtest (keys (%{$uridnsbl_subs}))
@@ -591,14 +578,14 @@
my $subrulename = $uridnsbl_subs->{$subtest}->{rulename};

if ($subtest eq $rdatastr) {
- $self->got_dnsbl_hit($scanstate, $ent, $rdatastr, $dom, $subrulename);
+ $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
}
# bitmask
elsif ($subtest =~ /^\d+$/) {
if ($rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ &&
Mail::SpamAssassin::Util::my_inet_aton($rdatastr) & $subtest)
{
- $self->got_dnsbl_hit($scanstate, $ent, $rdatastr, $dom, $subrulename);
+ $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
}
}
}
@@ -607,35 +594,34 @@
}

sub got_dnsbl_hit {
- my ($self, $scanstate, $ent, $str, $dom, $rulename) = @_;
+ my ($self, $scanner, $ent, $str, $dom, $rulename) = @_;

$str =~ s/\s+/ /gs; # long whitespace => short
dbg("uridnsbl: domain \"$dom\" listed ($rulename): $str");

- if (!defined $scanstate->{hits}->{$rulename}) {
- $scanstate->{hits}->{$rulename} = { };
+ if (!defined $scanner->{uridnsbl_hits}->{$rulename}) {
+ $scanner->{uridnsbl_hits}->{$rulename} = { };
};
- $scanstate->{hits}->{$rulename}->{$dom} = 1;
+ $scanner->{uridnsbl_hits}->{$rulename}->{$dom} = 1;

- my $scan = $scanstate->{scanner};
- if ($scanstate->{active_rules_revipbl}->{$rulename}
- || $scanstate->{active_rules_rhsbl}->{$rulename})
+ if ($scanner->{uridnsbl_active_rules_revipbl}->{$rulename}
+ || $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename})
{
# TODO: this needs to handle multiple domain hits per rule
- $scan->clear_test_state();
- my $uris = join (' ', keys %{$scanstate->{hits}->{$rulename}});
- $scan->test_log ("URIs: $uris");
- $scan->got_hit ($rulename, "");
+ $scanner->clear_test_state();
+ my $uris = join (' ', keys %{$scanner->{uridnsbl_hits}->{$rulename}});
+ $scanner->test_log ("URIs: $uris");
+ $scanner->got_hit ($rulename, "");

# note that this rule has completed (since it got at least 1 hit)
- $scanstate->{scanner}->register_async_rule_finish($rulename);
+ $scanner->register_async_rule_finish($rulename);
}
}

# ---------------------------------------------------------------------------

sub start_lookup {
- my ($self, $scanstate, $type, $id, $key) = @_;
+ my ($self, $scanner, $type, $id, $key) = @_;

my $ent = {
key => $key,
@@ -643,27 +629,27 @@
id => $id,
completed_callback => sub {
my $ent = shift;
- $self->completed_lookup_callback ($scanstate, $ent);
+ $self->completed_lookup_callback ($scanner, $ent);
}
};
- $scanstate->{scanner}->{async}->start_lookup($ent);
+ $scanner->{async}->start_lookup($ent);
return $ent;
}

sub completed_lookup_callback {
- my ($self, $scanstate, $ent) = @_;
+ my ($self, $scanner, $ent) = @_;
my $type = $ent->{type};
my $key = $ent->{key};
$key =~ /:(\S+?)$/; my $val = $1;

if ($type eq 'URI-NS') {
- $self->complete_ns_lookup ($scanstate, $ent, $val);
+ $self->complete_ns_lookup ($scanner, $ent, $val);
}
elsif ($type eq 'URI-A') {
- $self->complete_a_lookup ($scanstate, $ent, $val);
+ $self->complete_a_lookup ($scanner, $ent, $val);
}
elsif ($type eq 'URI-DNSBL') {
- $self->complete_dnsbl_lookup ($scanstate, $ent, $val);
+ $self->complete_dnsbl_lookup ($scanner, $ent, $val);
my $totalsecs = (time - $ent->{obj}->{querystart});
dbg("uridnsbl: query for ".$ent->{obj}->{dom}." took ".
$totalsecs." seconds to look up ($val)");
@@ -673,12 +659,12 @@
# ---------------------------------------------------------------------------

sub res_bgsend {
- my ($self, $scanstate, $host, $type) = @_;
+ my ($self, $scanner, $host, $type) = @_;

return $self->{main}->{resolver}->bgsend($host, $type, undef, sub {
my $pkt = shift;
my $id = shift;
- $scanstate->{scanner}->{async}->set_response_packet($id, $pkt);
+ $scanner->{async}->set_response_packet($id, $pkt);
});
}