Mailing List Archive

svn commit: r329629 - in /spamassassin/trunk/masses/rule-qa: automc/ruleqa.cgi rule-hits-over-time
Author: jm
Date: Sun Oct 30 12:33:15 2005
New Revision: 329629

URL: http://svn.apache.org/viewcvs?rev=329629&view=rev
Log:
redo rule-hits-over-time using the more useful gnuplot

Modified:
spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi
spamassassin/trunk/masses/rule-qa/rule-hits-over-time

Modified: spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi?rev=329629&r1=329628&r2=329629&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi (original)
+++ spamassassin/trunk/masses/rule-qa/automc/ruleqa.cgi Sun Oct 30 12:33:15 2005
@@ -19,13 +19,13 @@
close CF;

our %freqs_filenames = (
- 'DETAILS.age' => 'set 0, broken down by message age',
+ 'DETAILS.age' => 'set 0, broken down by message age in weeks',
'DETAILS.all' => 'set 0, broken down by contributor',
'DETAILS.new' => 'set 0, in aggregate',
'HTML.age' => 'set 0, by message age, HTML messages only',
'HTML.all' => 'set 0, by contributor, HTML messages only',
'HTML.new' => 'set 0, in aggregate, HTML messages only',
- 'NET.age' => 'set 1 (network), by message age',
+ 'NET.age' => 'set 1 (network), by message age in weeks',
'NET.all' => 'set 1 (network), by contributor',
'NET.new' => 'set 1 (network), in aggregate',
'OVERLAP.new' => 'set 0, overlaps between rules',

Modified: spamassassin/trunk/masses/rule-qa/rule-hits-over-time
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/rule-hits-over-time?rev=329629&r1=329628&r2=329629&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/rule-hits-over-time (original)
+++ spamassassin/trunk/masses/rule-qa/rule-hits-over-time Sun Oct 30 12:33:15 2005
@@ -1,6 +1,6 @@
#!/usr/bin/perl -w
#
-# rule-hits-over-time - produce graphs of rule hits over time, using GD::Graph
+# rule-hits-over-time - produce graphs of rule hits over time, using gnuplot
#
# <@LICENSE>
# Copyright 2004 Apache Software Foundation
@@ -21,8 +21,6 @@
use Getopt::Long;
use SDBM_File;
use GD;
-use GD::Graph;
-use GD::Graph::colour qw(:colours :lists :files :convert);

use strict;
use warnings;
@@ -68,8 +66,6 @@
my $graph_x = $opt_size_x || 800;
my $graph_y = $opt_size_y || 400;
my $scale_to_total_volume = ($opt_as_counts ? 0 : 1);
-my $graph_files_individually = 0; # or as ham & spam sets
-# my $y_ceiling = 3000; # mails per $period

my $fname_counter = 1;
my %graph_png_data = ();
@@ -78,8 +74,8 @@
my %allresults = ();
my @allfiles = ();

-my $gd;
-my $graph_data;
+my $graph_data = [];
+
my $this_file_results;

my $lastbucket;
@@ -95,25 +91,13 @@
}

my $file_sets = [ ]; # split into ham and spam
+$file_sets = [ [ 'TITLE:hits in ham' ], [ 'TITLE:hits in spam' ] ];

-if (!$graph_files_individually) {
- $file_sets = [ [ 'TITLE:hits in ham' ], [ 'TITLE:hits in spam' ] ];
-}
-
-# create all images as truecolor, with opaque background
-# OFF: causes white parts to be output in black. GD::Graph bug
-# GD::Image->trueColor(1);
-
foreach my $file (@ARGV) {
- if ($graph_files_individually) {
- push @{$file_sets}, [ $file ];
- }
- else {
- if ($file =~ /\bham\b/) {
- push @{$file_sets->[0]}, $file;
- } else {
- push @{$file_sets->[1]}, $file;
- }
+ if ($file =~ /\bham\b/) {
+ push @{$file_sets->[0]}, $file;
+ } else {
+ push @{$file_sets->[1]}, $file;
}
}

@@ -121,13 +105,13 @@
@allfiles = ();
%allbuckets = ();
%allresults = ();
- @allfiles = ();

my $settitle = '';
if ($set->[0] =~ /^TITLE:(.*)$/) {
$settitle = $1; shift(@{$set});
}
- create_gd("$opt_rule $settitle");
+
+ create_gp("$opt_rule $settitle");

foreach my $file (@{$set}) {
if (!$opt_text) {
@@ -151,17 +135,16 @@
$this_file_results = $allresults{$file};
read_logs($file);

- $graph_data = GD::Graph::Data->new();
+ $graph_data = [];
summarise();
}

- plot_gd();
+ plot_gp();
}

my $format = "gif";
-# my $format = $gd->export_format;

-if (!$graph_files_individually) {
+{
my $both = GD::Image->new($graph_x, 15 + ($graph_y * 2));
my $file01 = GD::Image->newFromPngData($graph_png_data{"file01"}, 1);
my $file02 = GD::Image->newFromPngData($graph_png_data{"file02"}, 1);
@@ -192,9 +175,6 @@

$both->gif();
}
-else {
- warn "TODO: cannot produce combined images in multi-file mode";
-}

if ($opt_cgi) {
system ("cd /; rm -rf $tmpdir"); # clean up tmp files
@@ -206,16 +186,31 @@
my $total_n = 0;
my @cols = ();
foreach my $file (@allfiles) {
- my $seen_y = $allresults{$file}->{"y".$bucket} || 0;
- my $seen_n = $allresults{$file}->{"n".$bucket} || 0;
+ my $seen_y = $allresults{$file}->{"y".$bucket};
+ my $seen_n = $allresults{$file}->{"n".$bucket};
+ if (!defined $seen_y && !defined $seen_n) {
+ $seen_n = $seen_y = -1;
+ } elsif (!defined $seen_y || !defined $seen_n) {
+ # assert: enforce both < 0, if either is
+ warn "oops? seen_y=$seen_y seen_n=$seen_n, should be both < 0";
+ $seen_n = $seen_y = -1;
+ }
+
+ if ($seen_y < 0 && $seen_n > 0 || $seen_n < 0 && $seen_y > 0) {
+ }
+
if ($scale_to_total_volume) {
- my $frac = $seen_y / (($seen_y + $seen_n) || 0.0001);
- push @cols, ($frac * 100.0);
+ if ($seen_y > 0) {
+ my $frac = $seen_y / (($seen_y + $seen_n) || 0.0001);
+ push @cols, ($frac * 100.0);
+ }
+ else {
+ push @cols, -1;
+ }
$total_n = 100;
}
else {
$total_n += $seen_n;
- # if ($y_ceiling && $seen_y > $y_ceiling) { $seen_y = $y_ceiling; }
push (@cols, $seen_y);
}
}
@@ -223,7 +218,6 @@
if ($scale_to_total_volume) {
@cols = ($bucket, @cols); # total_n is always "100"
} else {
- # if ($y_ceiling && $total_n > $y_ceiling) { $total_n = $y_ceiling; }
@cols = ($bucket, $total_n, @cols);
}

@@ -231,7 +225,7 @@
print join(' ',@cols)."\n";
}
else {
- $graph_data->add_point(@cols);
+ push (@{$graph_data}, \@cols);
}
}
}
@@ -291,87 +285,96 @@
$this_file_results->{"n".$lastbucket} = $seen_n; $seen_n = 0;
}

-sub create_gd {
+sub create_gp {
my $title = shift;

- use GD::Graph::lines;
- $gd = GD::Graph::lines->new($graph_x, $graph_y);
- $gd->set (
- title => $title,
- box_axis => 1,
- transparent => 1,
- ##interlaced => 0,
- # show_values => 1,
-
- bgclr => "#ffffff", # doesn't seem to work?!
- boxclr => "#ffffff",
- fgclr => "#444444",
- labelclr => "#333333",
-
- dclrs => [.
- "#33cc00", # green
- "#ff3300", # red
- "#0000cc", # blue
- "#99cc00", # mauve
- "#ff9900", # orange
- "#cccc00", # yellowish
- "#333333", # dark grey
- "#999999" # light grey
- ],
-
- t_margin => 5,
- b_margin => 5,
- l_margin => 5,
- r_margin => 20,
-
- y_label => ($scale_to_total_volume ?
- "\%age of mail in period" : "Hits in period"),
-
- zero_axis => 1,
-
- # x_label => "Time (in blocks of $period secs)",
- x_labels_vertical => 0,
- x_tick_number => 'auto',
- x_number_format => \&fmt_time_t,
- );
-
- # turned off, so that the Y axis scales nicely. can be reenabled
- #if ($scale_to_total_volume) {
- # $gd->set (
- # y_min_value => 0,
- # y_max_value => 100,
- # );
- #}
+ my $y_label = ($scale_to_total_volume ?
+ "\%age of mail in period" : "Hits in period");
+
+ open (GP, "| gnuplot -") or die "cannot run gnuplot";
+
+ print GP qq{
+
+ set xlabel 'Time, in blocks of $period secs. (NOTE: -1% hitrate means no data for that time period)'
+ set ylabel '$y_label'
+
+ set terminal png medium size $graph_x,$graph_y \\
+ xffffff x444444 x33cc00 \\
+ xff3300 x0000cc x99cc00 xff9900 \\
+ xcccc00 x333333 x999999 x9500d3
+
+ set out 'out.png'
+
+ set grid back xtics ytics
+
+ set xdata time
+ set timefmt "%Y-%m-%d"
+ set title "$title"
+
+ };
}

sub fmt_time_t {
my $tt = shift;
-
use POSIX qw(strftime);
- return strftime "%b %e %Y", gmtime($tt);
+ return strftime "%Y-%m-%d", gmtime($tt);
}

-sub plot_gd {
- if ($opt_text) {
- print STDERR '
+sub plot_gp {
+ if (!$opt_text)
+ {
+ open (DATA, ">plot.data") or die;
+ foreach my $line (@{$graph_data}) {
+ my $tt = shift @$line;
+ print DATA fmt_time_t($tt)," ",join(' ', @$line),"\n";
+ }
+ close DATA or die;
+
+ my @plot = ();
+ foreach my $i (0 .. (scalar @allfiles - 1)) {
+ my $legend = filename_to_legend ($allfiles[$i]);
+ my $style = $i+1;
+ my $col = $i+2;

- plot "times" using 0:1, "times" using 0:2
+ push @plot,
+ qq{ 'plot.data' using }.
+
+ # to plot "undefined" values as 0
+ # qq{ 1:(\$$col >= 0 ? \$$col : 0) }.
+ qq{ 1:(\$$col >= 0 ? \$$col : -1) }.
+
+ ## to not plot "undefined" values at all (ugly!)
+ # qq{ 1:(\$$col >= 0 ? \$$col : 1/0) }.
+
+ ## smoothing: not so useful
+ # qq{ smooth bezier }.
+
+ qq{ with linespoints lt $style pt $style }.
+ qq{ t '$legend' };
+
+ }
+
+ print GP "plot ",join(", ", @plot), "\n";
+ close GP;

- ';
- }
- elsif (!$graph_files_individually) {
- $gd->plot($graph_data);
my $graphname = sprintf("file%02d", $fname_counter++);
- $gd->gd()->transparent(-1);
- $graph_png_data{$graphname} = $gd->gd()->png;
- }
- else {
- $gd->plot($graph_data);
- my $format = "png";
- my $fname = sprintf("$outdir/file%02d.%s", $fname_counter++, $format);
- open(IMG, ">$fname") or die $!;
- binmode IMG;
- print IMG $gd->gd()->$format();
- close IMG;
+ $graph_png_data{$graphname} = readfile("out.png");
}
+}
+
+sub readfile {
+ open (IN, "<$_[0]") or die "cannot read $_[0]";
+ binmode IN;
+ my $str = join('',<IN>);
+ close IN;
+ return $str;
+}
+
+sub filename_to_legend {
+ my $f = shift;
+
+ $f =~ s/^.*\///;
+ $f =~ s/LOGS\.all-//;
+ $f =~ s/\.log\.\S+$//;
+ return $f;
}