Mailing List Archive: svn commit: r437423 - /spamassassin/branches/tvd-multi-mass-check/masses/mass-check

Author: felicity
Date: Sun Aug 27 10:41:25 2006
New Revision: 437423

URL: http://svn.apache.org/viewvc?rev=437423&view=rev
Log:
handle the results before handing out work units. also, change the unit format again.

Modified:
spamassassin/branches/tvd-multi-mass-check/masses/mass-check

Modified: spamassassin/branches/tvd-multi-mass-check/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/branches/tvd-multi-mass-check/masses/mass-check?rev=437423&r1=437422&r2=437423&view=diff
==============================================================================
--- spamassassin/branches/tvd-multi-mass-check/masses/mass-check (original)
+++ spamassassin/branches/tvd-multi-mass-check/masses/mass-check Sun Aug 27 10:41:25 2006
@@ -365,7 +365,6 @@
}

my $timestamps = {};
- my %reshash = ();
my $msgsout = { 'curnum' => 0 };

my $select = IO::Select->new( $serv_socket );
@@ -390,7 +389,7 @@
"");
}
elsif ($type eq 'POST') {
- handle_post_results($postdata, \%reshash);
+ handle_post_results($postdata, $timestamps, $msgsout);

my $messages = '';
if ($postdata->{'max_messages'}) {
@@ -439,8 +438,6 @@
}
}

- deal_with_results($timestamps, $msgsout, \%reshash);
-
# drop the listener when ready
# we're not awaiting responses and we've exhausted the input file
$select->remove($serv_socket) if (!keys %{$msgsout} && !defined $tmpfd);
@@ -1228,13 +1225,14 @@
# Generate an archive in the temp file
foreach my $num (@tosend) {
my $data = $msgsout->{$num}->{'data'};
-
my $msg = ($iter->run_message($data))[4];

- my @d = Mail::SpamAssassin::ArchiveIterator::index_unpack($data);
- $d[$#d] = $num;
-
- $iter->send_line($gzfd, Mail::SpamAssassin::ArchiveIterator::index_pack(@d));
+ # Archive format, gzip compressed file w/ 3 parts per message:
+ # 1- server message number in text format
+ # 2- server index string, binary packed format
+ # 3- message content
+ $iter->send_line($gzfd, $num);
+ $iter->send_line($gzfd, $data);
$iter->send_line($gzfd, join('', @{$msg}));
}

@@ -1250,45 +1248,43 @@
return $gzpath;
}

-# we've gotten results posted, setup a hash cache to batch deal with all the
-# clients later on
+# we've gotten results posted, so clean up msgsout and timestamp hashes and
+# process result...
sub handle_post_results {
- my($postdata, $reshash) = @_;
-
- while( my($k,$v) = each %{$postdata} ) {
- next if ($k !~ /^\d$/);
- $reshash->{$k} = $v;
- }
-}
-
-# we've read in from all of the clients which connected, so clean up msgsout
-# and timestamp hashes
-sub deal_with_results {
- my($timestamps, $msgsout, $reshash) = @_;
+ my($postdata, $timestamps, $msgsout) = @_;

+ # local version to batch the removals
my %timestamps = ();

# $msgsout->{num}->{data|timestamp}
# $timestamp{num} = [ msgout_nums ... ]
- # $reshash{num} = result_string
+ # $postdata{num} = result_string

- while( my($k,$v) = each %{$reshash} ) {
- my $data = $msgsout->{$k}->{'data'};
- my @d = Mail::SpamAssassin::ArchiveIterator::index_unpack($data);
+ while( my($k,$v) = each %{$postdata} ) {
+ next if ($k !~ /^\d$/);

- # the client had a fake filename, so replace the name
- $v =~ s/ \S+\d+ / $d[3] /;
+ # if we've been waiting for this result, process it, otherwise throw it on
+ # the ground. multiple clients could have been given the same messages to
+ # process, and we take whatever the first responder sends us.
+ if (exists $msgsout->{$k}) {
+ # the client had a fake filename, so replace the name
+ my @d = Mail::SpamAssassin::ArchiveIterator::index_unpack($msgsout->{$k}->{'data'});

- # go ahead and do the result
- &{$iter->{result_sub}}($d[1], $v, $d[0]);
+ # go ahead and do the result
+ &{$iter->{result_sub}}($d[1], $v, $d[0]);

- # prep to get rid of the cached entries
- $timestamps{$msgsout->{$k}->{'timestamp'}}->{$k} = 1;
- delete $msgsout->{$k};
+ # prep to get rid of the cached entries
+ $timestamps{$msgsout->{$k}->{'timestamp'}}->{$k} = 1;
+ delete $msgsout->{$k};
+ }
}

+ # if we got any results, clean out the results from the timestamp arrays
while ( my($k,$v) = each %timestamps ) {
my @temp = grep(!exists $v->{$_}, @{$timestamps->{$k}});
+
+ # if there are results left for a specific timestamp, update the array
+ # pointer. otherwise, delete the timestamp entry.
if (@temp) {
$timestamps->{$k} = \@temp;
}