Mailing List Archive

svn commit: rev 6303 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin t
Author: felicity
Date: Mon Jan 26 14:09:05 2004
New Revision: 6303

Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
incubator/spamassassin/trunk/t/dns.t
incubator/spamassassin/trunk/t/rule_tests.t
Log:
- bug 2267-related: I found that M::SA::Conf::_parse() was sucking
up a lot of the time of a single message run. I modified the
majority of configuration options to do string equality instead of
REs and got an improvement. It probably still needs some tweaks,
but it's working for me. :)

- started removing backwards compatibility options -- for instance,
bayes_auto_learn is now the required version, auto_learn alone won't
work.

- the previous Conf code would set the default scores whenever a
rule was defined in the config files. unfortunately, since we
typically define the rule, and then the score, this means we're
setting the scores to the default, then again when the real scores
are read in. I moved the default score setting to the end of the
parsing routine, so all the real scores are set, then anything
that's left without a score gets the appropriate default.

- more documentation bits around the MsgContainer and MsgParser code

- got rid of some module code not required in PerMsgStatus

- added a quick n' dirty useless entity modification to the URL list

- added license jargon to MsgParser

- fixed dns.t to exit if it actually shouldn't test anything

- killed useless Data::Dumper from rule_tests.t


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Mon Jan 26 14:09:05 2004
@@ -367,6 +367,14 @@
# Simplifies regexps below...
1 while s/^(\S+)\-(\S+)/$1_$2/g;

+ my($key, $value) = split(/\s+/, $_, 2);
+ $key = lc $key;
+ $value = '' unless ( defined $value );
+
+ # Do a better job untainting this info ...
+ $value =~ /^(.*)$/;
+ $value = $1;
+
=head2 VERSION OPTIONS

=over 4
@@ -380,8 +388,8 @@

=cut

- if (/^require_version\s+(.*)$/) {
- my $req_version = $1;
+ if ( $key eq 'require_version' ) {
+ my $req_version = $value;
$req_version =~ s/^\@\@VERSION\@\@$/$Mail::SpamAssassin::VERSION/;
if ($Mail::SpamAssassin::VERSION != $req_version) {
warn "configuration file \"$self->{currentfile}\" requires version ".
@@ -413,8 +421,8 @@

=cut

- if(/^version_tag\s+(.*)$/) {
- my $tag = lc($1);
+ if( $key eq 'version_tag' ) {
+ my $tag = lc($value);
$tag =~ tr/a-z0-9./_/c;
foreach (@Mail::SpamAssassin::EXTRA_VERSION) {
if($_ eq $tag) {
@@ -463,8 +471,8 @@

=cut

- if (/^whitelist_from\s+(.+)$/) {
- $self->add_to_addrlist ('whitelist_from', split (' ', $1)); next;
+ if ( $key eq 'whitelist_from' ) {
+ $self->add_to_addrlist ('whitelist_from', split (/\s+/, $value)); next;
}

=item unwhitelist_from add@ress.com
@@ -482,8 +490,8 @@

=cut

- if (/^unwhitelist_from\s+(.+)$/) {
- $self->remove_from_addrlist ('whitelist_from', split (' ', $1)); next;
+ if ( $key eq 'unwhitelist_from' ) {
+ $self->remove_from_addrlist ('whitelist_from', split (/\s+/, $value)); next;
}

=item whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
@@ -516,12 +524,12 @@

=cut

- if (/^whitelist_from_rcvd\s+(\S+)\s+(\S+)$/) {
- $self->add_to_addrlist_rcvd ('whitelist_from_rcvd', $1, $2);
+ if ( $key eq 'whitelist_from_rcvd' ) {
+ $self->add_to_addrlist_rcvd ('whitelist_from_rcvd', split(/\s+/, $value));
next;
}
- if (/^def_whitelist_from_rcvd\s+(\S+)\s+(\S+)$/) {
- $self->add_to_addrlist_rcvd ('def_whitelist_from_rcvd', $1, $2);
+ if ( $key eq 'def_whitelist_from_rcvd' ) {
+ $self->add_to_addrlist_rcvd ('def_whitelist_from_rcvd', split(/\s+/, $value));
next;
}

@@ -542,9 +550,9 @@

=cut

- if (/^unwhitelist_from_rcvd\s+(.+)$/) {
- $self->remove_from_addrlist_rcvd('whitelist_from_rcvd', split (' ', $1));
- $self->remove_from_addrlist_rcvd('def_whitelist_from_rcvd', split (' ', $1));
+ if ( $key eq 'unwhitelist_from_rcvd' ) {
+ $self->remove_from_addrlist_rcvd('whitelist_from_rcvd', split (/\s+/, $value));
+ $self->remove_from_addrlist_rcvd('def_whitelist_from_rcvd', split (/\s+/, $value));
next;
}

@@ -555,8 +563,8 @@

=cut

- if (/^blacklist_from\s+(.+)$/) {
- $self->add_to_addrlist ('blacklist_from', split (' ', $1)); next;
+ if ( $key eq 'blacklist_from' ) {
+ $self->add_to_addrlist ('blacklist_from', split (/\s+/, $value)); next;
}

=item unblacklist_from add@ress.com
@@ -572,8 +580,8 @@

=cut

- if (/^unblacklist_from\s+(.+)$/) {
- $self->remove_from_addrlist ('blacklist_from', split (' ', $1)); next;
+ if ( $key eq 'unblacklist_from' ) {
+ $self->remove_from_addrlist ('blacklist_from', split (/\s+/, $value)); next;
}


@@ -599,14 +607,14 @@

=cut

- if (/^whitelist_to\s+(.+)$/) {
- $self->add_to_addrlist ('whitelist_to', split (' ', $1)); next;
+ if ( $key eq 'whitelist_to' ) {
+ $self->add_to_addrlist ('whitelist_to', split (/\s+/, $value)); next;
}
- if (/^more_spam_to\s+(.+)$/) {
- $self->add_to_addrlist ('more_spam_to', split (' ', $1)); next;
+ if ( $key eq 'more_spam_to' ) {
+ $self->add_to_addrlist ('more_spam_to', split (/\s+/, $value)); next;
}
- if (/^all_spam_to\s+(.+)$/) {
- $self->add_to_addrlist ('all_spam_to', split (' ', $1)); next;
+ if ( $key eq 'all_spam_to' ) {
+ $self->add_to_addrlist ('all_spam_to', split (/\s+/, $value)); next;
}

=item blacklist_to add@ress.com
@@ -617,8 +625,8 @@

=cut

- if (/^blacklist_to\s+(.+)$/) {
- $self->add_to_addrlist ('blacklist_to', split (' ', $1)); next;
+ if ( $key eq 'blacklist_to' ) {
+ $self->add_to_addrlist ('blacklist_to', split (/\s+/, $value)); next;
}

=back
@@ -640,8 +648,8 @@

=cut

- if (/^required_hits\s+(\S+)$/) {
- $self->{required_hits} = $1+0.0; next;
+ if ( $key eq 'required_hits' ) {
+ $self->{required_hits} = $value+0.0; next;
}

=item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ]
@@ -675,8 +683,8 @@

=cut

- if (my ($rule, $scores) = /^score\s+(\S+)\s+(.*)$/) {
- my @scores = ($scores =~ /(\-*[\d\.]+)(?:\s+|$)/g);
+ if ( $key eq 'score' ) {
+ my($rule, @scores) = split(/\s+/, $value);
if (scalar @scores == 4) {
for my $index (0..3) {
$self->{scoreset}->[$index]->{$rule} = $scores[$index] + 0.0;
@@ -711,9 +719,11 @@

=cut

- if (/^rewrite_header\s+(subject|from|to)\s+(.+)$/i) {
- my $hdr = ucfirst(lc($1));
- my $string = $2;
+ if ( $key eq 'rewrite_header' ) {
+ # could check $hdr to be /subject|from|to/, but rest of the
+ # code already handles that ...
+ my($hdr, $string) = split(/\s+/, $value, 2);
+ $hdr = ucfirst(lc($hdr));
$string =~ tr/()/[]/;
$self->{rewrite_header}->{$hdr} = $string;
next;
@@ -731,8 +741,8 @@

=cut

- if (/^fold_headers\s+(\d+)$/) {
- $self->{fold_headers} = $1+0; next;
+ if ( $key eq 'fold_headers' ) {
+ $self->{fold_headers} = $value+0; next;
}

=item add_header { spam | ham | all } header_name string
@@ -767,6 +777,7 @@

=cut

+ # easier to do RE here ...
if (/^add_header\s+(ham|spam|all)\s+([A-Za-z0-9_-]+)\s+(.*?)\s*$/) {
my ($type, $name, $line) = ($1, $2, $3);
if ($line =~ /^"(.*)"$/) {
@@ -834,7 +845,7 @@

=cut

- if (/^clear_headers\s*$/) {
+ if ( $key eq 'clear_headers' ) {
for my $name (keys %{ $self->{headers_ham} }) {
delete $self->{headers_ham}->{$name} if $name ne "Checker-Version";
}
@@ -854,8 +865,8 @@

=cut

- if (/^report_safe_copy_headers\s+(.+?)\s*$/) {
- push(@{$self->{report_safe_copy_headers}}, split(/\s+/, $1));
+ if ( $key eq 'report_safe_copy_headers' ) {
+ push(@{$self->{report_safe_copy_headers}}, split(/\s+/, $value));
next;
}

@@ -882,8 +893,8 @@

=cut

- if (/^report_safe\s+(\d+)$/) {
- $self->{report_safe} = $1+0;
+ if ( $key eq 'report_safe' ) {
+ $self->{report_safe} = $value+0;
if (! $self->{report_safe}) {
$self->{headers_spam}->{"Report"} = "_REPORT_";
}
@@ -897,8 +908,8 @@

=cut

- if (/^report_charset\s*(.*)$/) {
- $self->{report_charset} = $1; next;
+ if ( $key eq 'report_charset' ) {
+ $self->{report_charset} = $value; next;
}

=item report ...some text for a report...
@@ -915,8 +926,8 @@

=cut

- if (/^report\b\s*(.*?)\s*$/) {
- my $report = $1;
+ if ( $key eq 'report' ) {
+ my $report = $value;
if ( $report =~ /^"(.*?)"$/ ) {
$report = $1;
}
@@ -930,7 +941,7 @@

=cut

- if (/^clear_report_template$/) {
+ if ( $key eq 'clear_report_template' ) {
$self->{report_template} = ''; next;
}

@@ -942,8 +953,8 @@

=cut

- if (/^report_contact\s+(.*?)\s*$/) {
- $self->{report_contact} = $1; next;
+ if ( $key eq 'report_contact' ) {
+ $self->{report_contact} = $value; next;
}

=item unsafe_report ...some text for a report...
@@ -959,8 +970,8 @@

=cut

- if (/^unsafe_report\b\s*(.*?)$/) {
- my $report = $1;
+ if ( $key eq 'unsafe_report' ) {
+ my $report = $value;
if ( $report =~ /^"(.*?)"$/ ) {
$report = $1;
}
@@ -974,7 +985,7 @@

=cut

- if (/^clear_unsafe_report_template$/) {
+ if ( $key eq 'clear_unsafe_report_template' ) {
$self->{unsafe_report_template} = ''; next;
}

@@ -989,8 +1000,8 @@

=cut

- if (/^spamtrap\s*(.*?)$/) {
- my $report = $1;
+ if ( $key eq 'spamtrap' ) {
+ my $report = $value;
if ( $report =~ /^"(.*?)"$/ ) {
$report = $1;
}
@@ -1003,7 +1014,7 @@

=cut

- if (/^clear_spamtrap_template$/) {
+ if ( $key eq 'clear_spamtrap_template' ) {
$self->{spamtrap_template} = ''; next;
}

@@ -1019,8 +1030,9 @@

=cut

- if (/^describe\s+(\S+)\s+(.*)$/) {
- $self->{descriptions}->{$1} = $2; next;
+ if ( $key eq 'describe' ) {
+ my($k,$v) = split(/\s+/, $value, 2);
+ $self->{descriptions}->{$k} = $v; next;
}

=back
@@ -1193,8 +1205,8 @@

=cut

- if (/^ok_languages\s+(.+)$/) {
- $self->{ok_languages} = $1; next;
+ if ( $key eq 'ok_languages' ) {
+ $self->{ok_languages} = $value; next;
}

=back
@@ -1247,8 +1259,8 @@

=cut

- if (/^ok_locales\s+(.+)$/) {
- $self->{ok_locales} = $1; next;
+ if ( $key eq 'ok_locales' ) {
+ $self->{ok_locales} = $value; next;
}

=back
@@ -1263,8 +1275,8 @@

=cut

- if (/^use_dcc\s+(\d+)$/) {
- $self->{use_dcc} = $1; next;
+ if ( $key eq 'use_dcc' ) {
+ $self->{use_dcc} = $value+0; next;
}

=item dcc_timeout n (default: 10)
@@ -1274,8 +1286,8 @@

=cut

- if (/^dcc_timeout\s+(\d+)$/) {
- $self->{dcc_timeout} = $1+0; next;
+ if ( $key eq 'dcc_timeout' ) {
+ $self->{dcc_timeout} = $value+0; next;
}

=item dcc_body_max NUMBER
@@ -1296,16 +1308,16 @@

=cut

- if (/^dcc_body_max\s+(\d+)/) {
- $self->{dcc_body_max} = $1+0; next;
+ if ( $key eq 'dcc_body_max' ) {
+ $self->{dcc_body_max} = $value+0; next;
}

- if (/^dcc_fuz1_max\s+(\d+)/) {
- $self->{dcc_fuz1_max} = $1+0; next;
+ if ( $key eq 'dcc_fuz1_max' ) {
+ $self->{dcc_fuz1_max} = $value+0; next;
}

- if (/^dcc_fuz2_max\s+(\d+)/) {
- $self->{dcc_fuz2_max} = $1+0; next;
+ if ( $key eq 'dcc_fuz2_max' ) {
+ $self->{dcc_fuz2_max} = $value+0; next;
}


@@ -1315,8 +1327,8 @@

=cut

- if (/^use_pyzor\s+(\d+)$/) {
- $self->{use_pyzor} = $1; next;
+ if ( $key eq 'use_pyzor' ) {
+ $self->{use_pyzor} = $value+0; next;
}

=item pyzor_timeout n (default: 10)
@@ -1326,8 +1338,8 @@

=cut

- if (/^pyzor_timeout\s+(\d+)$/) {
- $self->{pyzor_timeout} = $1+0; next;
+ if ( $key eq 'pyzor_timeout' ) {
+ $self->{pyzor_timeout} = $value+0; next;
}

=item pyzor_max NUMBER
@@ -1340,8 +1352,8 @@

=cut

- if (/^pyzor_max\s+(\d+)/) {
- $self->{pyzor_max} = $1+0; next;
+ if ( $key eq 'pyzor_max' ) {
+ $self->{pyzor_max} = $value+0; next;
}

=item trusted_networks ip.add.re.ss[/mask] ... (default: none)
@@ -1396,8 +1408,8 @@

=cut

- if (/^trusted_networks\s+(.+)$/) {
- foreach my $net (split (' ', $1)) {
+ if ( $key eq 'trusted_networks' ) {
+ foreach my $net (split (/\s+/, $value)) {
$self->{trusted_networks}->add_cidr ($net);
}
next;
@@ -1409,7 +1421,7 @@

=cut

- if (/^clear_trusted_networks$/) {
+ if ( $key eq 'clear_trusted_networks' ) {
$self->{trusted_networks} = Mail::SpamAssassin::NetSet->new(); next;
}

@@ -1431,8 +1443,8 @@

=cut

- if (/^internal_networks\s+(.+)$/) {
- foreach my $net (split (' ', $1)) {
+ if ( $key eq 'internal_networks' ) {
+ foreach my $net (split (/\s+/, $value)) {
$self->{internal_networks}->add_cidr ($net);
}
next;
@@ -1444,7 +1456,7 @@

=cut

- if (/^clear_internal_networks$/) {
+ if ( $key eq 'clear_internal_networks' ) {
$self->{internal_networks} = Mail::SpamAssassin::NetSet->new(); next;
}

@@ -1454,8 +1466,8 @@

=cut

- if (/^use_razor2\s+(\d+)$/) {
- $self->{use_razor2} = $1; next;
+ if ( $key eq 'use_razor2' ) {
+ $self->{use_razor2} = $value+0; next;
}

=item razor_timeout n (default: 10)
@@ -1465,8 +1477,8 @@

=cut

- if (/^razor_timeout\s+(\d+)$/) {
- $self->{razor_timeout} = $1; next;
+ if ( $key eq 'razor_timeout' ) {
+ $self->{razor_timeout} = $value+0; next;
}

=item use_bayes ( 0 | 1 ) (default: 1)
@@ -1475,8 +1487,8 @@

=cut

- if (/^use_bayes\s+(\d+)$/) {
- $self->{use_bayes} = $1; next;
+ if ( $key eq 'use_bayes' ) {
+ $self->{use_bayes} = $value+0; next;
}

=item skip_rbl_checks { 0 | 1 } (default: 0)
@@ -1486,8 +1498,8 @@

=cut

- if (/^skip_rbl_checks\s+(\d+)$/) {
- $self->{skip_rbl_checks} = $1+0; next;
+ if ( $key eq 'skip_rbl_checks' ) {
+ $self->{skip_rbl_checks} = $value+0; next;
}

=item rbl_timeout n (default: 15)
@@ -1512,8 +1524,8 @@

=cut

- if (/^rbl_timeout\s+(\d+)$/) {
- $self->{rbl_timeout} = $1+0; next;
+ if ( $key eq 'rbl_timeout' ) {
+ $self->{rbl_timeout} = $value+0; next;
}

=item check_mx_attempts n (default: 2)
@@ -1523,8 +1535,8 @@

=cut

- if (/^check_mx_attempts\s+(\S+)$/) {
- $self->{check_mx_attempts} = $1+0; next;
+ if ( $key eq 'check_mx_attempts' ) {
+ $self->{check_mx_attempts} = $value+0; next;
}

=item check_mx_delay n (default: 5)
@@ -1533,8 +1545,8 @@

=cut

- if (/^check_mx_delay\s+(\S+)$/) {
- $self->{check_mx_delay} = $1+0; next;
+ if ( $key eq 'check_mx_delay' ) {
+ $self->{check_mx_delay} = $value+0; next;
}


@@ -1557,6 +1569,7 @@

=cut

+ # RE is easier for now ...
if (/^dns_available\s+(yes|no|test|test:\s+.+)$/) {
$self->{dns_available} = ($1 or "test"); next;
}
@@ -1567,8 +1580,8 @@

=cut

- if (/^use_hashcash\s+(\d+)$/) {
- $self->{use_hashcash} = $1; next;
+ if ( $key eq 'use_hashcash' ) {
+ $self->{use_hashcash} = $value+0; next;
}

=item hashcash_accept add@ress.com ...
@@ -1589,8 +1602,8 @@

=cut

- if (/^hashcash_accept\s+(.+)$/) {
- $self->add_to_addrlist ('hashcash_accept', split (' ', $1)); next;
+ if ( $key eq 'hashcash_accept' ) {
+ $self->add_to_addrlist ('hashcash_accept', split (/\s+/, $value)); next;
}

=item hashcash_doublespend_path /path/to/file (default: ~/.spamassassin/hashcash_seen)
@@ -1605,8 +1618,8 @@

=cut

- if (/^hashcash_doublespend_path\s+(.*)$/) {
- $self->{hashcash_doublespend_path} = $1; next;
+ if ( $key eq 'hashcash_doublespend_path' ) {
+ $self->{hashcash_doublespend_path} = $value; next;
}

=item hashcash_doublespend_file_mode (default: 0700)
@@ -1619,8 +1632,8 @@

=cut

- if (/^hashcash_doublespend_file_mode\s+(.*)$/) {
- $self->{hashcash_doublespend_file_mode} = $1; next;
+ if ( $key eq 'hashcash_doublespend_file_mode' ) {
+ $self->{hashcash_doublespend_file_mode} = $value+0; next;
}

=back
@@ -1646,8 +1659,8 @@

=cut

- if (/^auto_whitelist_factor\s+(.*)$/) {
- $self->{auto_whitelist_factor} = $1; next;
+ if ( $key eq 'auto_whitelist_factor' ) {
+ $self->{auto_whitelist_factor} = $value+0; next;
}

=item bayes_auto_learn ( 0 | 1 ) (default: 1)
@@ -1668,8 +1681,8 @@

=cut

- if (/^(?:bayes_)?auto_learn\s+(.*)$/) {
- $self->{bayes_auto_learn} = $1+0; next;
+ if ( $key eq 'bayes_auto_learn' ) {
+ $self->{bayes_auto_learn} = $value+0; next;
}

=item bayes_auto_learn_threshold_nonspam n.nn (default: 0.1)
@@ -1679,8 +1692,8 @@

=cut

- if (/^(?:bayes_)?auto_learn_threshold_nonspam\s+(.*)$/) {
- $self->{bayes_auto_learn_threshold_nonspam} = $1+0; next;
+ if ( $key eq 'bayes_auto_learn_threshold_nonspam' ) {
+ $self->{bayes_auto_learn_threshold_nonspam} = $value+0; next;
}

=item bayes_auto_learn_threshold_spam n.nn (default: 12.0)
@@ -1694,8 +1707,8 @@

=cut

- if (/^(?:bayes_)?auto_learn_threshold_spam\s+(.*)$/) {
- $self->{bayes_auto_learn_threshold_spam} = $1+0; next;
+ if ( $key eq 'bayes_auto_learn_threshold_spam' ) {
+ $self->{bayes_auto_learn_threshold_spam} = $value+0; next;
}

=item bayes_ignore_header header_name
@@ -1712,8 +1725,8 @@

=cut

- if (/^bayes_ignore_header\s+(.*)$/) {
- push (@{$self->{bayes_ignore_headers}}, $1); next;
+ if ( $key eq 'bayes_ignore_header' ) {
+ push (@{$self->{bayes_ignore_headers}}, $value); next;
}

=item bayes_min_ham_num (Default: 200)
@@ -1726,12 +1739,12 @@

=cut

- if (/^bayes_min_ham_num\s+(.*)$/) {
- $self->{bayes_min_ham_num} = $1+0; next;
+ if ( $key eq 'bayes_min_ham_num' ) {
+ $self->{bayes_min_ham_num} = $value+0; next;
}

- if (/^bayes_min_spam_num\s+(.*)$/) {
- $self->{bayes_min_spam_num} = $1+0; next;
+ if ( $key eq 'bayes_min_spam_num' ) {
+ $self->{bayes_min_spam_num} = $value+0; next;
}

=item bayes_learn_during_report (Default: 1)
@@ -1742,8 +1755,8 @@

=cut

- if (/^bayes_learn_during_report\s+(.*)$/) {
- $self->{bayes_learn_during_report} = $1+0; next;
+ if ( $key eq 'bayes_learn_during_report' ) {
+ $self->{bayes_learn_during_report} = $value+0; next;
}

=back
@@ -1757,6 +1770,8 @@
This can be enabled here, but should instead be done with the
C<rewrite_header> option described above.

+The use of this option is deprecated.
+
=cut

if (/^rewrite_subject\s+(\d+)$/) {
@@ -1912,6 +1927,8 @@
next;
}

+##############
+
=item pyzor_options [option ...]

Additional options for the pyzor(1) command line. Note that for security,
@@ -1919,6 +1936,7 @@

=cut

+ # leave as RE for now?
if (/^pyzor_options\s+([-A-Za-z0-9_\/ ]+)$/) {
$self->{pyzor_options} = $1;
next;
@@ -1941,8 +1959,8 @@

=cut

- if (/^num_check_received\s+(\d+)$/) {
- $self->{num_check_received} = $1+0; next;
+ if ( $key eq 'num_check_received' ) {
+ $self->{num_check_received} = $value+0; next;
}

###########################################################################
@@ -1994,8 +2012,8 @@

=cut

- if (/^allow_user_rules\s+(\d+)$/) {
- $self->{allow_user_rules} = $1+0;
+ if ( $key eq 'allow_user_rules' ) {
+ $self->{allow_user_rules} = $value+0;
dbg( ($self->{allow_user_rules} ? "Allowing":"Not allowing") . " user rules!"); next;
}

@@ -2119,6 +2137,7 @@

=cut

+ # easier as RE now
if (/^header\s+(\S+)\s+(?:rbl)?eval:(.*)$/) {
my ($name, $fn) = ($1, $2);

@@ -2135,8 +2154,8 @@
$self->{descriptions}->{$1} = "Found a $2 header";
next;
}
- if (/^header\s+(\S+)\s+(.*)$/) {
- $self->add_test ($1, $2, TYPE_HEAD_TESTS);
+ if ( $key eq 'header' ) {
+ $self->add_test (split(/\s+/,$value,2), TYPE_HEAD_TESTS);
next;
}

@@ -2157,12 +2176,13 @@

=cut

+ # easier as RE right now
if (/^body\s+(\S+)\s+eval:(.*)$/) {
$self->add_test ($1, $2, TYPE_BODY_EVALS);
next;
}
- if (/^body\s+(\S+)\s+(.*)$/) {
- $self->add_test ($1, $2, TYPE_BODY_TESTS);
+ if ( $key eq 'body' ) {
+ $self->add_test (split(/\s+/,$value,2), TYPE_BODY_TESTS);
next;
}

@@ -2183,8 +2203,8 @@
# $self->add_test ($1, $2, TYPE_URI_EVALS);
# next;
# }
- if (/^uri\s+(\S+)\s+(.*)$/) {
- $self->add_test ($1, $2, TYPE_URI_TESTS);
+ if ( $key eq 'uri' ) {
+ $self->add_test (split(/\s+/,$value,2), TYPE_URI_TESTS);
next;
}

@@ -2202,12 +2222,13 @@

=cut

+ # easier as RE now
if (/^rawbody\s+(\S+)\s+eval:(.*)$/) {
$self->add_test ($1, $2, TYPE_RAWBODY_EVALS);
next;
}
- if (/^rawbody\s+(\S+)\s+(.*)$/) {
- $self->add_test ($1, $2, TYPE_RAWBODY_TESTS);
+ if ( $key eq 'rawbody' ) {
+ $self->add_test (split(/\s+/,$value,2), TYPE_RAWBODY_TESTS);
next;
}

@@ -2225,12 +2246,13 @@

=cut

+ # easier as RE now
if (/^full\s+(\S+)\s+eval:(.*)$/) {
$self->add_test ($1, $2, TYPE_FULL_EVALS);
next;
}
- if (/^full\s+(\S+)\s+(.*)$/) {
- $self->add_test ($1, $2, TYPE_FULL_TESTS);
+ if ( $key eq 'full' ) {
+ $self->add_test (split(/\s+/,$value,2), TYPE_FULL_TESTS);
next;
}

@@ -2262,8 +2284,8 @@

=cut

- if (/^meta\s+(\S+)\s+(.*)$/) {
- $self->add_test ($1, $2, TYPE_META_TESTS);
+ if ( $key eq 'meta' ) {
+ $self->add_test (split(/\s+/,$value,2), TYPE_META_TESTS);
next;
}

@@ -2298,8 +2320,9 @@

=cut

- if (/^tflags\s+(\S+)\s+(.+)$/) {
- $self->{tflags}->{$1} = $2; next;
+ if ( $key eq 'tflags' ) {
+ my($k,$v) = split(/\s+/, $value, 2);
+ $self->{tflags}->{$k} = $v; next;
next; # ignored in SpamAssassin modules
}

@@ -2330,6 +2353,7 @@

=cut

+ # RE ...
if (/^test\s+(\S+)\s+(ok|fail)\s+(.*)$/) {
$self->add_regression_test($1, $2, $3); next;
}
@@ -2341,8 +2365,8 @@

=cut

- if (/^razor_config\s+(.*)$/) {
- $self->{razor_config} = $1; next;
+ if ( $key eq 'razor_config' ) {
+ $self->{razor_config} = $value; next;
}

=item pyzor_path STRING
@@ -2354,8 +2378,8 @@

=cut

- if (/^pyzor_path\s+(.+)$/) {
- $self->{pyzor_path} = $1; next;
+ if ( $key eq 'pyzor_path' ) {
+ $self->{pyzor_path} = $value; next;
}

=item dcc_home STRING
@@ -2368,8 +2392,8 @@

=cut

- if (/^dcc[-_]home\s+(.+)$/) {
- $self->{dcc_home} = $1; next;
+ if ( $key eq 'dcc_home' ) {
+ $self->{dcc_home} = $value; next;
}

=item dcc_dccifd_path STRING
@@ -2380,8 +2404,8 @@

=cut

- if (/^dcc[-_]dccifd[-_]path\s+(.+)$/) {
- $self->{dcc_dccifd_path} = $1; next;
+ if ( $key eq 'dcc_dccifd_path' ) {
+ $self->{dcc_dccifd_path} = $value; next;
}

=item dcc_path STRING
@@ -2393,8 +2417,8 @@

=cut

- if (/^dcc_path\s+(.+)$/) {
- $self->{dcc_path} = $1; next;
+ if ( $key eq 'dcc_path' ) {
+ $self->{dcc_path} = $value; next;
}

=item dcc_options options
@@ -2406,6 +2430,7 @@

=cut

+ # RE ...
if (/^dcc_options\s+([A-Z -]+)/) {
$self->{dcc_options} = $1; next;
}
@@ -2425,8 +2450,8 @@

=cut

- if (/^use_auto_whitelist\s+(\d+)$/) {
- $self->{use_auto_whitelist} = $1; next;
+ if ( $key eq 'use_auto_whitelist' ) {
+ $self->{use_auto_whitelist} = $value+0; next;
}

=item auto_whitelist_factory module (default: Mail::SpamAssassin::DBBasedAddrList)
@@ -2435,8 +2460,8 @@

=cut

- if (/^auto_whitelist_factory\s+(.*)$/) {
- $self->{auto_whitelist_factory} = $1; next;
+ if ( $key eq 'auto_whitelist_factory' ) {
+ $self->{auto_whitelist_factory} = $value; next;
}

=item auto_whitelist_path /path/to/file (default: ~/.spamassassin/auto-whitelist)
@@ -2447,8 +2472,8 @@

=cut

- if (/^auto_whitelist_path\s+(.*)$/) {
- $self->{auto_whitelist_path} = $1; next;
+ if ( $key eq 'auto_whitelist_path' ) {
+ $self->{auto_whitelist_path} = $value; next;
}

=item bayes_path /path/to/file (default: ~/.spamassassin/bayes)
@@ -2466,8 +2491,8 @@

=cut

- if (/^bayes_path\s+(.*)$/) {
- $self->{bayes_path} = $1; next;
+ if ( $key eq 'bayes_path' ) {
+ $self->{bayes_path} = $value; next;
}

=item auto_whitelist_file_mode (default: 0700)
@@ -2480,8 +2505,8 @@

=cut

- if (/^auto_whitelist_file_mode\s+(.*)$/) {
- $self->{auto_whitelist_file_mode} = $1; next;
+ if ( $key eq 'auto_whitelist_file_mode' ) {
+ $self->{auto_whitelist_file_mode} = $value+0; next;
}

=item bayes_file_mode (default: 0700)
@@ -2494,8 +2519,8 @@

=cut

- if (/^bayes_file_mode\s+(.*)$/) {
- $self->{bayes_file_mode} = $1; next;
+ if ( $key eq 'bayes_file_mode' ) {
+ $self->{bayes_file_mode} = $value+0; next;
}

=item bayes_use_hapaxes (default: 1)
@@ -2506,8 +2531,8 @@

=cut

- if (/^bayes_use_hapaxes\s+(.*)$/) {
- $self->{bayes_use_hapaxes} = $1; next;
+ if ( $key eq 'bayes_use_hapaxes' ) {
+ $self->{bayes_use_hapaxes} = $value+0; next;
}

=item bayes_use_chi2_combining (default: 1)
@@ -2519,8 +2544,8 @@

=cut

- if (/^bayes_use_chi2_combining\s+(.*)$/) {
- $self->{bayes_use_chi2_combining} = $1; next;
+ if ( $key eq 'bayes_use_chi2_combining' ) {
+ $self->{bayes_use_chi2_combining} = $value+0; next;
}

=item bayes_journal_max_size (default: 102400)
@@ -2532,8 +2557,8 @@

=cut

- if (/^bayes_journal_max_size\s+(\d+)$/) {
- $self->{bayes_journal_max_size} = $1; next;
+ if ( $key eq 'bayes_journal_max_size' ) {
+ $self->{bayes_journal_max_size} = $value+0; next;
}

=item bayes_expiry_max_db_size (default: 150000)
@@ -2545,8 +2570,8 @@

=cut

- if (/^bayes_expiry_max_db_size\s+(\d+)$/) {
- $self->{bayes_expiry_max_db_size} = $1; next;
+ if ( $key eq 'bayes_expiry_max_db_size' ) {
+ $self->{bayes_expiry_max_db_size} = $value+0; next;
}

=item bayes_auto_expire (default: 1)
@@ -2557,8 +2582,8 @@

=cut

- if (/^bayes_auto_expire\s+(\d+)$/) {
- $self->{bayes_auto_expire} = $1; next;
+ if ( $key eq 'bayes_auto_expire' ) {
+ $self->{bayes_auto_expire} = $value+0; next;
}

=item bayes_learn_to_journal (default: 0)
@@ -2571,8 +2596,8 @@

=cut

- if (/^bayes_learn_to_journal\s+(.*)$/) {
- $self->{bayes_learn_to_journal} = $1+0; next;
+ if ( $key eq 'bayes_learn_to_journal' ) {
+ $self->{bayes_learn_to_journal} = $value+0; next;
}

=item user_scores_dsn DBI:databasetype:databasename:hostname:port
@@ -2582,8 +2607,8 @@

=cut

- if (/^user_scores_dsn\s+(\S+)$/) {
- $self->{user_scores_dsn} = $1; next;
+ if ( $key eq 'user_scores_dsn' ) {
+ $self->{user_scores_dsn} = $value; next;
}

=item user_scores_sql_username username
@@ -2592,8 +2617,8 @@

=cut

- if(/^user_scores_sql_username\s+(\S+)$/) {
- $self->{user_scores_sql_username} = $1; next;
+ if( $key eq 'user_scores_sql_username' ) {
+ $self->{user_scores_sql_username} = $value; next;
}

=item user_scores_sql_password password
@@ -2602,8 +2627,8 @@

=cut

- if(/^user_scores_sql_password\s+(\S+)$/) {
- $self->{user_scores_sql_password} = $1; next;
+ if( $key eq 'user_scores_sql_password' ) {
+ $self->{user_scores_sql_password} = $value; next;
}

# REIMPLEMENT: to allow integration with Horde's pref stuff. allow setting
@@ -2621,6 +2646,7 @@

=cut

+ # leave as RE right now
if (/^loadplugin\s+(\S+)\s+(\S+)$/) {
$self->load_plugin ($1, $2); next;
}
@@ -2639,6 +2665,23 @@
$self->{errors}++;
}

+ # All scoresets should have a score defined, so if this one doesn't,
+ # we should set a default... Do this here instead of add_test
+ # because mostly 'score' occurs after the rule is specified, so why
+ # set the scores to default, then set them again at 'score'?
+ #
+ while ( my($k,$v) = each %{$self->{tests}} ) {
+ if ( ! exists $self->{scores}->{$k} ) {
+ # T_ rules (in a testing probationary period) get low, low scores
+ my $set_score = ($k =~/^T_/) ? 0.01 : 1.0;
+
+ $set_score = -$set_score if ( $self->{tflags}->{$k} =~ /\bnice\b/ );
+ for my $index (0..3) {
+ $self->{scoreset}->[$index]->{$k} = $set_score;
+ }
+ }
+ }
+
delete $self->{scoresonly};
}

@@ -2651,17 +2694,6 @@

if ($self->{scoresonly}) {
$self->{user_rules_to_compile}->{$type} = 1;
- }
-
- # All scoresets should have a score defined, so if the one we're in
- # doesn't, we need to set them all.
- # TODO? - nice tests should get negative scores
- if ( ! exists $self->{scores}->{$name} ) {
- # T_ rules (in a testing probationary period) get low, low scores
- my $set_score = ($name =~/^T_/) ? 0.01 : 1.0;
- for my $index (0..3) {
- $self->{scoreset}->[$index]->{$name} = $set_score;
- }
}
}


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm Mon Jan 26 14:09:05 2004
@@ -16,6 +16,23 @@
# limitations under the License.
# </@LICENSE>

+=head1 NAME
+
+Mail::SpamAssassin::MsgContainer - decode, render, and make available MIME message parts
+
+=head1 SYNOPSIS
+
+=head1 DESCRIPTION
+
+This module will encapsulate an email message and allow access to
+the various MIME message parts.
+
+=head1 PUBLIC METHODS
+
+=over 4
+
+=cut
+
package Mail::SpamAssassin::MsgContainer;
use strict;
use MIME::Base64;
@@ -24,6 +41,10 @@
use MIME::Base64;
use MIME::QuotedPrint;

+=item new()
+
+=cut
+
# M::SA::MIME is an object method used to encapsulate a message's MIME part
#
sub new {
@@ -47,6 +68,10 @@
$self;
}

+=item find_parts()
+
+=cut
+
# Used to find any MIME parts whose simple content-type matches a given regexp
# Searches it's own and any children parts. Returns an array of MIME
# objects which match.
@@ -79,6 +104,10 @@
return @ret;
}

+=item header()
+
+=cut
+
# Store or retrieve headers from a given MIME object
#
sub header {
@@ -114,6 +143,10 @@
}
}

+=item raw_header()
+
+=cut
+
# Retrieve raw headers from a given MIME object
#
sub raw_header {
@@ -134,6 +167,10 @@
}
}

+=item add_body_part()
+
+=cut
+
# Add a MIME child part to ourselves
sub add_body_part {
my($self, $part) = @_;
@@ -142,7 +179,7 @@
push @{ $self->{'body_parts'} }, $part;
}

-=item _decode()
+=item decode()

Decode base64 and quoted-printable parts.

@@ -195,14 +232,10 @@
}
}

-=item _html_near_start()
-
-Look at a text scalar and determine whether it should be rendered
-as text/html. Based on a heuristic which simulates a certain common
-mail client.
-
-=cut
-
+# Look at a text scalar and determine whether it should be rendered
+# as text/html. Based on a heuristic which simulates a certain
+# well-used/common mail client.
+#
sub _html_near_start {
my ($pad) = @_;

@@ -269,6 +302,10 @@
return ($self->{'rendered_type'}, $self->{'rendered'});
}

+=item content_summary()
+
+=cut
+
# return an array with scalars describing mime parts
sub content_summary {
my($self, $recurse) = @_;
@@ -299,6 +336,10 @@
}
}

+=item delete_header()
+
+=cut
+
sub delete_header {
my($self, $hdr) = @_;

@@ -327,12 +368,8 @@
}
}

-=item _decode_header()
-
-Decode base64 and quoted-printable in headers according to RFC2047.
-
-=cut
-
+# Decode base64 and quoted-printable in headers according to RFC2047.
+#
sub _decode_header {
my($header) = @_;

@@ -350,6 +387,10 @@
return $header;
}

+=item get_pristine_header()
+
+=cut
+

sub get_pristine_header {
my ($self, $hdr) = @_;
@@ -364,7 +405,10 @@
}
}

-#sub get { shift->get_header(@_); }
+=item get_header()
+
+=cut
+
sub get_header {
my ($self, $hdr, $raw) = @_;
$raw ||= 0;
@@ -388,7 +432,10 @@
}
}

-#sub header { shift->get_all_headers(@_); }
+=item get_all_headers()
+
+=cut
+
sub get_all_headers {
my ($self, $raw) = @_;
$raw ||= 0;
@@ -407,7 +454,10 @@
}
}

-#sub body { return shift->get_body(@_); }
+=item get_body()
+
+=cut
+
sub get_body {
my ($self) = @_;
my @ret = split(/^/m, $self->{pristine_body});
@@ -416,20 +466,36 @@

# ---------------------------------------------------------------------------

+=item get_pristine()
+
+=cut
+
sub get_pristine {
my ($self) = @_;
return $self->{pristine_headers} . $self->{pristine_body};
}

+=item get_pristine_body()
+
+=cut
+
sub get_pristine_body {
my ($self) = @_;
return $self->{pristine_body};
}

+=item as_string()
+
+=cut
+
sub as_string {
my ($self) = @_;
return $self->get_all_headers(1) . "\n" . $self->{pristine_body};
}
+
+=item ignore()
+
+=cut

sub ignore {
my ($self) = @_;

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm Mon Jan 26 14:09:05 2004
@@ -1,6 +1,24 @@
+# $Id: MIME.pm,v 1.8 2003/10/02 22:59:00 quinlan Exp $
+
+# <@LICENSE>
+# Copyright 2004 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </@LICENSE>
+
=head1 NAME

-Mail::SpamAssassin::MsgParser - parse, decode, and render MIME body parts
+Mail::SpamAssassin::MsgParser - parse and store MIME formatted messages

=head1 SYNOPSIS

@@ -11,7 +29,7 @@
be decoded as necessary, and text/html parts will be rendered into a
standard text format, suitable for use in SpamAssassin.

-=head1 METHODS
+=head1 PUBLIC METHODS

=over 4

@@ -27,18 +45,18 @@

=item parse()

-Unlike most modules, Mail::SpamAssassin::MsgParser will not return an
-object of the same type, but rather a Mail::SpamAssassin::MsgContainer object.
-To use it, simply call C<Mail::SpamAssassin::MsgParser->parse($msg)>,
-where $msg is a scalar with the entire contents of the mesage.
+Unlike most modules, Mail::SpamAssassin::MsgParser will not return
+an object of the same type, but rather a Mail::SpamAssassin::MsgContainer
+object. To use it, simply call
+C<Mail::SpamAssassin::MsgParser->parse($msg)>, where $msg is either
+a scalar, an array reference, or a glob, with the entire contents
+of the mesage.

The procedure used to parse a message is recursive and ends up generating
a tree of M::SA::MsgContainer objects. parse() will generate the parent node
of the tree, then pass the body of the message to _parse_body() which begins
the recursive process.

-This is the only public method available!
-
=cut

sub parse {
@@ -110,6 +128,8 @@
return $msg;
}

+=head1 NON-PUBLIC METHODS
+
=item _parse_body()

_parse_body() passes the body part that was passed in onto the
@@ -157,7 +177,8 @@

=item _parse_multipart()

-Generate a root node, and for each child part call _parse_body().
+Generate a root node, and for each child part call _parse_body()
+to generate the tree.

=cut

@@ -304,5 +325,6 @@
=head1 SEE ALSO

C<Mail::SpamAssassin>
+C<Mail::SpamAssassin::MsgContainer>
C<spamassassin>


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Mon Jan 26 14:09:05 2004
@@ -54,7 +54,6 @@

use Mail::SpamAssassin::EvalTests;
use Mail::SpamAssassin::AutoWhitelist;
-use Mail::SpamAssassin::HTML;
use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::Received;
use Mail::SpamAssassin::Util;
@@ -101,7 +100,6 @@
}

# HTML parser stuff
- $self->{html_mod} = Mail::SpamAssassin::HTML->new();
$self->{html} = {};

bless ($self, $class);
@@ -1003,6 +1001,7 @@
$text .= $text ? "\n$rnd" : $rnd;

# TVD - if there are multiple parts, what should we do?
+ # right now, just use the last one ...
$self->{html} = $p->{html_results} if ( $type eq 'text/html' );
}
else {
@@ -1477,6 +1476,11 @@
# bug 2844
# http://www.foo.biz?id=3 -> http://www.foo.biz/?id=3
$uri =~ s/^(https?:\/\/[^\/\?]+)\?/$1\/?/;
+
+ # deal with encoding of chars ...
+ # this is just the set of printable chars, minus ' ' (aka: dec 33-126)
+ #
+ $uri =~ s/\&\#0*(3[3-9]|[4-9]\d|1[01]\d|12[0-6]);/sprintf "%c",$1/e;

my($nuri, $unencoded, $encoded) = Mail::SpamAssassin::Util::URLEncode($uri);
if ( $nuri ne $uri ) {

Modified: incubator/spamassassin/trunk/t/dns.t
==============================================================================
--- incubator/spamassassin/trunk/t/dns.t (original)
+++ incubator/spamassassin/trunk/t/dns.t Mon Jan 26 14:09:05 2004
@@ -12,6 +12,8 @@
plan tests => ((TEST_ENABLED && HAS_NET_DNS) ? 6 : 0),
};

+exit unless (TEST_ENABLED && HAS_NET_DNS);
+
# ---------------------------------------------------------------------------

%patterns = (

Modified: incubator/spamassassin/trunk/t/rule_tests.t
==============================================================================
--- incubator/spamassassin/trunk/t/rule_tests.t (original)
+++ incubator/spamassassin/trunk/t/rule_tests.t Mon Jan 26 14:09:05 2004
@@ -19,7 +19,6 @@
use Test;
use Mail::SpamAssassin;
use Mail::SpamAssassin::MsgParser;
-#use Data::Dumper; $Data::Dumper::Indent=1;
use vars qw($num_tests);

$num_tests = 1;