Mailing List Archive

svn commit: rev 6575 - incubator/spamassassin/trunk/tools
Author: jm
Date: Sat Feb 7 18:15:13 2004
New Revision: 6575

Added:
incubator/spamassassin/trunk/tools/bayes_dump_to_trusted_networks (contents, props changed)
Log:
added tool to generate 'trusted_networks' lines from a Bayes db dump

Added: incubator/spamassassin/trunk/tools/bayes_dump_to_trusted_networks
==============================================================================
--- (empty file)
+++ incubator/spamassassin/trunk/tools/bayes_dump_to_trusted_networks Sat Feb 7 18:15:13 2004
@@ -0,0 +1,139 @@
+#!/usr/bin/perl
+
+=head1 NAME
+
+bayes_dump_to_trusted_networks - generate configuration from Bayes database
+
+=head1 SYNOPSIS
+
+sa-learn --dump | bayes_dump_to_trusted_networks [opts] > trust.cf
+bayes_dump_to_trusted_networks bayes.dump [opts] > trust.cf
+
+options:
+
+ --minham n
+ --rdns
+
+=head1 DESCRIPTION
+
+This tool uses a dump of your Bayes database to determine which
+IP addresses are 'trustworthy', and therefore should be listed in
+'trusted_networks' lines in your configuration.
+
+This will reduce unneccesary DNSBL lookups, will whitelist mails from
+trustworthy sources, and allows several SpamAssassin rules to operate more
+effectively.
+
+A 'trustworthy' IP is one that is trusted not to B<forge> emails; in other
+words, it's not a subverted machine running a proxy, or one under spammer
+control.
+
+As such, any IP that has relayed more than 3 ham mails is considered
+trustworthy. It doesn't matter if it has ever relayed spam mails to you, since
+large ISP smarthost relays will have done so -- relaying both ham and spam from
+their customer pool. (The important thing is that it relayed the mail without
+forging sender address information.)
+
+=head1 OPTIONS
+
+=over 4
+
+=item --minham n
+
+Require C<n> or more ham messages before considering an IP a candidate
+for trust. Default: 3.
+
+=item --rdns
+
+Annotate with reverse-DNS for that IP address. Slows things down,
+but easier to read.
+
+=back
+
+=cut
+
+use vars qw{
+ $IP_ADDRESS $IP_IN_RESERVED_RANGE
+};
+
+# an IP address, in IPv4, IPv4-mapped-in-IPv6, or IPv6 format. NOTE: cannot
+# just refer to $IPV4_ADDRESS, due to perl bug reported in nesting qr//s. :(
+#
+$IP_ADDRESS = qr/\b (?:IPv6:|) (?: (?:0*:0*:ffff:(?:0*:|)|) # IPv4-mapped-in-IPv6
+ (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
+ (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
+ (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
+ (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)
+ | # an IPv6 address, seems to always be at least 6 words
+ [a-f0-9]{0,4} \:[a-f0-9]{0,4}
+ \:[a-f0-9]{0,4} \:[a-f0-9]{0,4}
+ \:[a-f0-9]{0,4} \:[a-f0-9]{0,4} (?:\:[a-f0-9]{0,4})*
+ )\b/ix;
+
+$IP_IN_RESERVED_RANGE = qr{^(?:
+ 192\.168| # 192.168/16: Private Use
+ 10| # 10/8: Private Use
+ 172\.(?:1[6-9]|2[0-9]|3[01])| # 172.16-172.31/16: Private Use
+ 169\.254| # 169.254/16: Private Use (APIPA)
+ 127| # 127/8: Private Use (local host)
+ [01257]| # 000-002/8, 005/8, 007/8: Reserved
+ 2[37]| # 023/8, 027/8: Reserved
+ 3[179]| # 031/8, 037/8, 039/8: Reserved
+ 4[12]| # 041/8, 042/8: Reserved
+ 5[89]| # 058/8, 059/8: Reserved
+ 60| # 060/8: Reserved
+ 7[0-9]| # 070-079/8: Reserved
+ 9[0-9]| # -
+ 1[01][0-9]| # -
+ 12[0-6]| # 126/8: Reserved
+ 197| # 197/8: Reserved
+ 22[23]| # 222/8, 223/8: Reserved
+ 24[0-9]| # 240-
+ 25[0-5] # 255/8: Reserved
+)\.}x;
+
+use Getopt::Long;
+
+sub usage {
+ die "
+usage: bayes_dump_to_trusted_networks [--minham n] [--rdns] [file]
+";
+}
+
+use vars qw(
+ $opt_minham $opt_rdns $opt_help
+ );
+
+GetOptions(
+ 'minham:i' => \$opt_minham,
+ 'rdns' => \$opt_rdns,
+ 'help' => \$opt_help
+) or usage();
+$opt_help and usage();
+
+$opt_rdns ||= 0;
+$opt_minham ||= 3;
+
+while (<>) {
+ my ($prob, $nspam, $nham, $atime, $tok) = split;
+
+ # only select IP-address Received tokens
+ next if ($tok !~ /^H\*r:ip\*(${IP_ADDRESS})/o);
+ my $ip = $1;
+
+ next unless ($nham >= $opt_minham); # has relayed >= n ham mails
+ next if ($ip =~ /$IP_IN_RESERVED_RANGE/o);
+ $ip =~ s/[^0-9\.\:]/_/gs; # sanitise!
+
+ my $rdns = '';
+ if ($opt_rdns) {
+ my $host = `host $ip 2>&1`;
+ $host =~ s/^.* domain name pointer (.+)\s*$/$1/gm;
+ $host =~ s/\.$//;
+ $rdns = "\t# $host";
+ }
+
+ print "trusted_networks $ip$rdns\n";
+}
+
+