Mailing List Archive

svn commit: r314756 - in /spamassassin/trunk: build/ rules/ rulescode/
Author: jm
Date: Tue Oct 11 19:52:19 2005
New Revision: 314756

URL: http://svn.apache.org/viewcvs?rev=314756&view=rev
Log:
move all rule source out of 'rules' directory, and update build scripts to deal with this

Added:
spamassassin/trunk/rulescode/
spamassassin/trunk/rulescode/10_default_prefs.cf
- copied unchanged from r312984, spamassassin/trunk/rules/10_default_prefs.cf
spamassassin/trunk/rulescode/20_dnsbl_tests.cf
- copied unchanged from r312984, spamassassin/trunk/rules/20_dnsbl_tests.cf
spamassassin/trunk/rulescode/20_html_tests.cf
- copied unchanged from r312984, spamassassin/trunk/rules/20_html_tests.cf
spamassassin/trunk/rulescode/20_net_tests.cf
- copied unchanged from r312984, spamassassin/trunk/rules/20_net_tests.cf
spamassassin/trunk/rulescode/23_bayes.cf
- copied unchanged from r312984, spamassassin/trunk/rules/23_bayes.cf
spamassassin/trunk/rulescode/25_accessdb.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_accessdb.cf
spamassassin/trunk/rulescode/25_antivirus.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_antivirus.cf
spamassassin/trunk/rulescode/25_dcc.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_dcc.cf
spamassassin/trunk/rulescode/25_domainkeys.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_domainkeys.cf
spamassassin/trunk/rulescode/25_hashcash.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_hashcash.cf
spamassassin/trunk/rulescode/25_pyzor.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_pyzor.cf
spamassassin/trunk/rulescode/25_razor2.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_razor2.cf
spamassassin/trunk/rulescode/25_spf.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_spf.cf
spamassassin/trunk/rulescode/25_textcat.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_textcat.cf
spamassassin/trunk/rulescode/25_uribl.cf
- copied unchanged from r312984, spamassassin/trunk/rules/25_uribl.cf
spamassassin/trunk/rulescode/60_awl.cf
- copied unchanged from r312984, spamassassin/trunk/rules/60_awl.cf
spamassassin/trunk/rulescode/60_whitelist.cf
- copied unchanged from r312984, spamassassin/trunk/rules/60_whitelist.cf
spamassassin/trunk/rulescode/60_whitelist_subject.cf
- copied unchanged from r312984, spamassassin/trunk/rules/60_whitelist_subject.cf
spamassassin/trunk/rulescode/70_broken_rules.cf
- copied unchanged from r312984, spamassassin/trunk/rules/70_broken_rules.cf
spamassassin/trunk/rulescode/70_testing.cf
- copied unchanged from r312984, spamassassin/trunk/rules/70_testing.cf
spamassassin/trunk/rulescode/70_uribl.cf
- copied unchanged from r312984, spamassassin/trunk/rules/70_uribl.cf
spamassassin/trunk/rulescode/STATISTICS-set0.txt
- copied unchanged from r312984, spamassassin/trunk/rules/STATISTICS-set0.txt
spamassassin/trunk/rulescode/STATISTICS-set1.txt
- copied unchanged from r312984, spamassassin/trunk/rules/STATISTICS-set1.txt
spamassassin/trunk/rulescode/STATISTICS-set2.txt
- copied unchanged from r312984, spamassassin/trunk/rules/STATISTICS-set2.txt
spamassassin/trunk/rulescode/STATISTICS-set3.txt
- copied unchanged from r312984, spamassassin/trunk/rules/STATISTICS-set3.txt
spamassassin/trunk/rulescode/init.pre
- copied unchanged from r312984, spamassassin/trunk/rules/init.pre
spamassassin/trunk/rulescode/languages
- copied unchanged from r312984, spamassassin/trunk/rules/languages
spamassassin/trunk/rulescode/local.cf
- copied unchanged from r312984, spamassassin/trunk/rules/local.cf
spamassassin/trunk/rulescode/name-triplets.txt
- copied unchanged from r312984, spamassassin/trunk/rules/name-triplets.txt
spamassassin/trunk/rulescode/regression_tests.cf
- copied unchanged from r312984, spamassassin/trunk/rules/regression_tests.cf
spamassassin/trunk/rulescode/triplets.txt
- copied unchanged from r312984, spamassassin/trunk/rules/triplets.txt
spamassassin/trunk/rulescode/user_prefs.template
- copied unchanged from r312984, spamassassin/trunk/rules/user_prefs.template
spamassassin/trunk/rulescode/v310.pre
- copied unchanged from r312984, spamassassin/trunk/rules/v310.pre
Removed:
spamassassin/trunk/rules/10_default_prefs.cf
spamassassin/trunk/rules/20_dnsbl_tests.cf
spamassassin/trunk/rules/20_html_tests.cf
spamassassin/trunk/rules/20_net_tests.cf
spamassassin/trunk/rules/23_bayes.cf
spamassassin/trunk/rules/25_accessdb.cf
spamassassin/trunk/rules/25_antivirus.cf
spamassassin/trunk/rules/25_dcc.cf
spamassassin/trunk/rules/25_domainkeys.cf
spamassassin/trunk/rules/25_hashcash.cf
spamassassin/trunk/rules/25_pyzor.cf
spamassassin/trunk/rules/25_razor2.cf
spamassassin/trunk/rules/25_spf.cf
spamassassin/trunk/rules/25_textcat.cf
spamassassin/trunk/rules/25_uribl.cf
spamassassin/trunk/rules/60_awl.cf
spamassassin/trunk/rules/60_whitelist.cf
spamassassin/trunk/rules/60_whitelist_subject.cf
spamassassin/trunk/rules/70_broken_rules.cf
spamassassin/trunk/rules/70_testing.cf
spamassassin/trunk/rules/70_uribl.cf
spamassassin/trunk/rules/STATISTICS-set0.txt
spamassassin/trunk/rules/STATISTICS-set1.txt
spamassassin/trunk/rules/STATISTICS-set2.txt
spamassassin/trunk/rules/STATISTICS-set3.txt
spamassassin/trunk/rules/init.pre
spamassassin/trunk/rules/languages
spamassassin/trunk/rules/local.cf
spamassassin/trunk/rules/name-triplets.txt
spamassassin/trunk/rules/regression_tests.cf
spamassassin/trunk/rules/triplets.txt
spamassassin/trunk/rules/user_prefs.template
spamassassin/trunk/rules/v310.pre
Modified:
spamassassin/trunk/build/mkrules

Modified: spamassassin/trunk/build/mkrules
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/mkrules?rev=314756&r1=314755&r2=314756&view=diff
==============================================================================
--- spamassassin/trunk/build/mkrules (original)
+++ spamassassin/trunk/build/mkrules Tue Oct 11 19:52:19 2005
@@ -30,21 +30,25 @@
use File::Copy;

use Getopt::Long;
-use vars qw($opt_src $opt_out);
-GetOptions("src=s", "out=s");
+use vars qw(@opt_srcs $opt_out);
+GetOptions("src=s" => \@opt_srcs, "out=s");

-die "no src" unless ($opt_src);
+die "no src" unless (@opt_srcs >= 1);
die "no out" unless ($opt_out);
-die "unreadable src" unless (-d $opt_src);
die "unreadable out" unless (-d $opt_out);

# source files that need compilation, and their targets
my $needs_compile = { };
+my $current_src;

-File::Find::find ({
+foreach my $src (@opt_srcs) {
+ die "unreadable src" unless (-d $src);
+ $current_src = $src;
+ File::Find::find ({
wanted => \&wanted,
no_chdir => 1
- }, $opt_src);
+ }, $src);
+}

# context for the rules compiler
my $seen_rules = { };
@@ -58,14 +62,22 @@
exit;

sub wanted {
- return unless (-f $File::Find::name && /\d.*\.(?:cf|pm)$/i);
+ my $path = $File::Find::name;
+ # only files
+ return if (!-f $path);
+
+ # limit what will be copied from sandboxes
+ return if ($path =~ /sandbox/ && /\d.*\.(?:cf|pm)$/i);
+
+ # a bit of sanity please - no svn metadata ;)
+ return if ($path =~ /\.svn/);

- my $dir = $File::Find::name;
- $dir =~ s/^${opt_src}[\/\\\:]//s;
+ my $dir = $path;
+ $dir =~ s/^${current_src}[\/\\\:]//s;
$dir =~ s/([^\/\\\:]+)$//;
my $filename = $1;

- my $f = "$opt_src/$dir$filename";
+ my $f = "$current_src/$dir$filename";
my $t = "$opt_out/$filename";
$needs_compile->{$f} = {
f => $f,
@@ -95,8 +107,7 @@
my $needs_rebuild = 1;

if ($entry->{dir} =~ /sandbox/) {
- # sandbox rules
- rule_file_compile(1, $f, $t, $entry->{filename});
+ rule_file_compile_sandbox($f, $t, $entry->{filename});
}
elsif ($entry->{dir} =~ /extra/) {
# 'extra' rulesets; not built by default (TODO)
@@ -105,7 +116,7 @@
else {
# rules in "core" and "lang" are always copied
if ($needs_rebuild) {
- rule_file_compile(0, $f, $t, $entry->{filename});
+ rule_file_compile_core($f, $t, $entry->{filename});
}
}
}
@@ -135,8 +146,8 @@
# TODO: linting during compilation, and ignore lint-failures? may have to
# reimplement a small subset of lint behaviour to do this.

-sub rule_file_compile {
- my ($is_sandbox, $f, $t, $filename) = @_;
+sub rule_file_compile_sandbox {
+ my ($f, $t, $filename) = @_;

open (IN, "<$f") or die "cannot read $f";

@@ -162,6 +173,9 @@
my $ALWAYS_PUBLISH = '!always_publish!';
$rules->{$ALWAYS_PUBLISH} = { text => '', publish => 1 };

+ # an "ifplugin" or "if" scope
+ my $current_conditional;
+
while (<IN>) {
my $orig = $_;

@@ -184,7 +198,7 @@

if (/^
(header|rawbody|body|full|uri|meta|mimeheader|describe|
- tflags|reuse|score)
+ tflags|reuse|score)
\s+(\S+)\s+(.*)$
/x)
{
@@ -194,12 +208,11 @@
my $val = $3;

my $origname = $name;
- if ($is_sandbox) {
- $name = rule_name_avoid_collisions($name, $f);
- }
+ $name = rule_name_avoid_collisions($name, $f);

if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{origname} = $origname;
+ $rules->{$name}->{cond} = $current_conditional;
$rules->{$name}->{text} .= $orig;

$lastrule = $name;
@@ -216,9 +229,7 @@
my $val = $3;

my $origname = $name;
- if ($is_sandbox) {
- $name = rule_name_avoid_collisions($name, $f);
- }
+ $name = rule_name_avoid_collisions($name, $f);

if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{origname} = $origname;
@@ -230,11 +241,18 @@
}
$rules->{$name}->{$command} = $val;
}
+ elsif (/^
+ (if|ifplugin)
+ \s+(.*?)$
+ /x)
+ {
+ $current_conditional = $orig;
+ }
+ elsif (/^endif\b/x)
+ {
+ undef $current_conditional;
+ }
else {
- # this is a non-comment, non-rule, non-build-directive line.
- # create a file with the same name as the input file, and
- # publish to that.
-
# warn "unknown line in rules file '$f', saving to default: $orig";
$rules->{$ALWAYS_PUBLISH}->{text} .= $orig;
}
@@ -254,7 +272,7 @@
$pubfile = $opt_out.'/'.$filename;
$output_files->{$pubfile} = 1;

- if ($is_sandbox && !$rules->{$name}->{publish}) {
+ if (!$rules->{$name}->{publish}) {
# don't output non-published rules
next;
}
@@ -272,7 +290,13 @@
$output_file_text->{$pubfile} .= $cmts;
}

- $output_file_text->{$pubfile} .= $text;
+ my $cond = $rules->{$name}->{cond};
+ if ($cond) {
+ $output_file_text->{$pubfile} .= $cond.$text."endif\n";
+ }
+ else {
+ $output_file_text->{$pubfile} .= $text;
+ }
}

print "$f: ".(scalar @$rule_order)." rules copied\n";
@@ -282,6 +306,64 @@
foreach my $name (@$rule_order) {
$seen_rules->{$name} = 1;
}
+}
+
+sub rule_file_compile_core {
+ my ($f, $t, $filename) = @_;
+
+ my $pubfile = $opt_out.'/'.$filename;
+ $output_files->{$pubfile} = 1;
+
+ open (IN, "<$f") or die "cannot read $f";
+ while (<IN>) {
+ my $orig = $_;
+
+ s/^#reuse/reuse/; # TODO - dirty hack. we need to fix this to just be
+ # a keyword which the engine ignores, this is absurd!
+
+ s/#.*$//g; s/^\s+//; s/\s+$//;
+
+ # always publish non-sandbox lines verbatim. just note what
+ # rules we've seen, and carry on
+ $output_file_text->{$pubfile} .= $orig;
+
+ # save "lang" declarations
+ my $lang = '';
+ if (s/^lang\s+(\S+)\s+//) {
+ $lang = $1;
+ }
+
+ if (/^
+ (header|rawbody|body|full|uri|meta|mimeheader|describe|
+ tflags|reuse|score)
+ \s+(\S+)\s+(.*)$
+ /x)
+ {
+ # rule definitions
+ my $type = $1;
+ my $name = $2;
+ my $val = $3;
+
+ # just save the name, and ignore the rest; we're already publishing it
+ $seen_rules->{$name} = 1;
+ }
+ elsif (/^
+ (pubfile|publish)
+ \s+(\S+)\s*(.*?)$
+ /x)
+ {
+ # preprocessor directives
+ my $command = $1;
+ my $name = $2;
+ my $val = $3;
+
+ warn "$f: cannot use 'publish' in non-sandbox files\n";
+ }
+ }
+ close IN;
+
+ # now append all the found text to the output file buffers
+ print "$f: all lines copied\n";
}

sub write_output_files {