Mailing List Archive: r3873 - in trunk: devel/benchmarks devel/benchmarks/indexers devel/bin perl/buildlib/Lucy

Author: creamyg
Date: 2008-09-11 07:53:51 -0700 (Thu, 11 Sep 2008)
New Revision: 3873

Modified:
trunk/devel/benchmarks/extract_reuters.plx
trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm
trunk/devel/benchmarks/indexers/kinosearch_indexer.plx
trunk/devel/benchmarks/indexers/plucene_indexer.plx
trunk/devel/bin/dump_index
trunk/devel/bin/predit
trunk/devel/bin/syncl
trunk/devel/bin/tidyall
trunk/perl/buildlib/Lucy/Build.pm
Log:
Update comment style.

Modified: trunk/devel/benchmarks/extract_reuters.plx
===================================================================
--- trunk/devel/benchmarks/extract_reuters.plx 2008-09-11 05:33:15 UTC (rev 3872)
+++ trunk/devel/benchmarks/extract_reuters.plx 2008-09-11 14:53:51 UTC (rev 3873)
@@ -5,7 +5,7 @@
use File::Spec::Functions qw( catfile catdir );
use Cwd qw( getcwd );

-# ensure call from correct location and with required arg
+# Ensure call from correct location and with required arg.
my $source_dir = $ARGV[0];
die "Usage: ./extract_reuters.plx /path/to/expanded/archive"
unless -d $source_dir;
@@ -13,30 +13,30 @@
die "Must be run from the benchmarks/ directory"
unless ( $working_dir =~ /benchmarks\W*$/ );

-# create the main output directory
+# Create the main output directory.
my $main_out_dir = 'extracted_corpus';
if ( !-d $main_out_dir ) {
mkdir $main_out_dir or die "Couldn't mkdir '$main_out_dir': $!";
}

-# get a list of the sgm files
+# Get a list of the sgm files.
opendir SOURCE_DIR, $source_dir or die "Couldn't open directory: $!";
my @sgm_files = grep {/\.sgm$/} readdir SOURCE_DIR;
closedir SOURCE_DIR or die "Couldn't close directory: $!";
die "Couldn't find all the sgm files"
unless @sgm_files == 22;

-# track number of story docs
+# Track number of story docs.
my $num_files = 0;

for my $sgm_file (@sgm_files) {
- # get the sgm file
+ # Get the sgm file.
my $sgm_filepath = catfile( $source_dir, $sgm_file );
print "Processing $sgm_filepath\n";
open( my $sgm_fh, '<', $sgm_filepath )
or die "Couldn't open file '$sgm_filepath': $!";

- # prepare output directory
+ # Prepare output directory.
$sgm_file =~ /(\d+)\.sgm$/ or die "no match";
my $out_dir = catdir( $main_out_dir, "articles$1" );
if ( !-d $out_dir ) {
@@ -47,13 +47,13 @@
my $in_title = 0;
my ( $title, $body );
while (<$sgm_fh>) {
- # start a new story doc
+ # Start a new story doc.
if (/<REUTERS/) {
$title = '';
$body = '';
}

- # extract title and body
+ # Extract title and body.
if (s/.*?<TITLE>//) {
$in_title = 1;
$title = '';
@@ -73,7 +73,7 @@
$body =~ s#</BODY>.*##s;
}

- # write out a finished article doc
+ # Write out a finished article doc.
if (m#</REUTERS>#) {
die "Malformed data" if ( $in_title or $in_body );
if ( length $title and length $body ) {

Modified: trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm
===================================================================
--- trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm 2008-09-11 05:33:15 UTC (rev 3872)
+++ trunk/devel/benchmarks/indexers/BenchmarkingIndexer.pm 2008-09-11 14:53:51 UTC (rev 3873)
@@ -61,15 +61,15 @@
}

sub start_report {
- # start the output
+ # Start the output.
print '-' x 60 . "\n";
}

-# Print out aggregate stats
+# Print out aggregate stats.
sub print_final_report {
my ( $self, $times ) = @_;

- # produce mean and truncated mean
+ # Produce mean and truncated mean.
my @sorted_times = sort @$times;
my $num_to_chop = int( @sorted_times >> 2 );
my $mean = 0;
@@ -77,7 +77,7 @@
my $num_kept = 0;
for ( my $i = 0; $i < @sorted_times; $i++ ) {
$mean += $sorted_times[$i];
- # discard fastest 25% and slowest 25% of runs
+ # Discard fastest 25% and slowest 25% of runs.
next if $i < $num_to_chop;
next if $i > ( $#sorted_times - $num_to_chop );
$trunc_mean += $sorted_times[$i];
@@ -90,7 +90,7 @@
$mean = sprintf( "%.2f", $mean );
$trunc_mean = sprintf( "%.2f", $trunc_mean );

- # get some info about the system
+ # Get some info about the system.
my $thread_support = $Config{usethreads} ? "yes" : "no";
my @uname_info = (uname)[ 0, 2, 4 ];

@@ -137,7 +137,7 @@
require KinoSearch;
require KinoSearch::InvIndexer;

- # provide runtime flexibility
+ # Provide runtime flexibility.
my $schema = $self->{schema} = BenchSchema->new;
if ( $self->{store} ) {
$schema->add_field( field => 'body', spec => 'text' );
@@ -174,7 +174,7 @@
my ( $max, $increment, $article_filepaths )
= @{$self}{qw( docs increment article_filepaths )};

- # start timer
+ # Start timer.
my $start = gettimeofday();

my $invindexer = $self->init_indexer(0);
@@ -182,7 +182,7 @@
my $count = 0;
while ( $count < $max ) {
for my $article_filepath (@$article_filepaths) {
- # the title is the first line, the body is the rest
+ # The title is the first line, the body is the rest.
open( my $article_fh, '<', $article_filepath )
or die "Can't open file '$article_filepath'";

@@ -192,7 +192,7 @@

$invindexer->add_doc( \%doc );

- # bail if we've reached spec'd number of docs
+ # Bail if we've reached spec'd number of docs.
$count++;
last if $count >= $max;
if ( $count % $increment == 0 and $count ) {
@@ -203,10 +203,10 @@
}
}

- # finish index
+ # Finish index.
$invindexer->finish( optimize => 1 );

- # return elapsed seconds
+ # Return elapsed seconds.
my $end = gettimeofday();
my $secs = $end - $start;
return ( $count, $secs );
@@ -250,10 +250,10 @@
my ( $max, $increment, $article_filepaths )
= @{$self}{qw( docs increment article_filepaths )};

- # cause text to be stored if spec'd
+ # Cause text to be stored if spec'd.
my $field_constructor = $self->{store} ? 'Text' : 'UnStored';

- # start timer
+ # Start timer.
my $start = gettimeofday();

my $writer = $self->init_indexer(0);
@@ -261,13 +261,13 @@
my $count = 0;
while ( $count < $max ) {
for my $article_filepath (@$article_filepaths) {
- # the title is the first line, the body is the rest
+ # The title is the first line, the body is the rest.
open( my $article_fh, '<', $article_filepath )
or die "Can't open file '$article_filepath'";
my $title = <$article_fh>;
my $body = do { local $/; <$article_fh> };

- # add content to index
+ # Add content to index.
my $doc = Plucene::Document->new;
$doc->add( Plucene::Document::Field->Text( title => $title ) );
$doc->add(
@@ -275,7 +275,7 @@
);
$writer->add_document($doc);

- # bail if we've reached spec'd number of docs
+ # Bail if we've reached spec'd number of docs.
$count++;
last if ( $count >= $max );
if ( $count % $increment == 0 and $count ) {
@@ -285,10 +285,10 @@
}
}

- # finish index
+ # Finish index.
$writer->optimize;

- # return elapsed seconds
+ # Return elapsed seconds.
my $end = gettimeofday();
my $secs = $end - $start;
return ( $count, $secs );

Modified: trunk/devel/benchmarks/indexers/kinosearch_indexer.plx
===================================================================
--- trunk/devel/benchmarks/indexers/kinosearch_indexer.plx 2008-09-11 05:33:15 UTC (rev 3872)
+++ trunk/devel/benchmarks/indexers/kinosearch_indexer.plx 2008-09-11 14:53:51 UTC (rev 3873)
@@ -9,7 +9,7 @@
use Cwd qw( getcwd );
use BenchmarkingIndexer;

-# index all docs and run one iter unless otherwise spec'd
+# Index all docs and run one iter unless otherwise spec'd.
my ( $num_reps, $max_to_index, $increment, $store, $build_index );
GetOptions(
'reps=s' => \$num_reps,
@@ -37,12 +37,12 @@
my @times;
for my $rep ( 1 .. $num_reps ) {

- # spawn an index-building child process
+ # Spawn an index-building child process.
my $command = "$^X ";
- # try to figure out if this program was called with -Mblib
+ # Try to figure out if this program was called with -Mblib.
for (@INC) {
next unless /\bblib\b/;
- # propagate -Mblib to the child
+ # Propagate -Mblib to the child.
$command .= "-Mblib ";
last;
}
@@ -52,7 +52,7 @@
$command .= "--increment=$increment " if $increment;
my $output = `$command`;

- # extract elapsed time from the output of the child
+ # Extract elapsed time from the output of the child.
$output =~ /^docs: (\d+) elapsed: ([\d.]+)/
or die "no match: '$output'";
my $docs = $1;

Modified: trunk/devel/benchmarks/indexers/plucene_indexer.plx
===================================================================
--- trunk/devel/benchmarks/indexers/plucene_indexer.plx 2008-09-11 05:33:15 UTC (rev 3872)
+++ trunk/devel/benchmarks/indexers/plucene_indexer.plx 2008-09-11 14:53:51 UTC (rev 3873)
@@ -8,12 +8,12 @@
use Cwd qw( getcwd );
use BenchmarkingIndexer;

-# verify that we're running from the right directory;
+# Verify that we're running from the right directory.
my $working_dir = getcwd;
die "Must be run from benchmarks/"
unless $working_dir =~ /benchmarks\W*$/;

-# index all docs and run one iter unless otherwise spec'd
+# Index all docs and run one iter unless otherwise spec'd.
my ( $num_reps, $max_to_index, $increment, $store, $build_index );
GetOptions(
'reps=s' => \$num_reps,
@@ -40,12 +40,12 @@

my @times;
for my $rep ( 1 .. $num_reps ) {
- # spawn an index-building child process
+ # Spawn an index-building child process.
my $command = "$^X ";
- # try to figure out if this program was called with -Mblib
+ # Try to figure out if this program was called with -Mblib.
for (@INC) {
next unless /\bblib\b/;
- # propagate -Mblib to the child
+ # Propagate -Mblib to the child.
$command .= "-Mblib ";
last;
}
@@ -55,7 +55,7 @@
$command .= "--increment=$increment " if $increment;
my $output = `$command`;

- # extract elapsed time from the output of the child
+ # Extract elapsed time from the output of the child.
$output =~ /^docs: (\d+) elapsed: ([\d.]+)/
or die "no match: '$output'";
my $docs = $1;

Modified: trunk/devel/bin/dump_index
===================================================================
--- trunk/devel/bin/dump_index 2008-09-11 05:33:15 UTC (rev 3872)
+++ trunk/devel/bin/dump_index 2008-09-11 14:53:51 UTC (rev 3873)
@@ -4,7 +4,7 @@

use KinoSearch::Index::IndexReader;

-# parse and validate command line args
+# Parse and validate command line args.
die "Usage: dump_index SCHEMA_CLASS INDEX_LOCATION" unless @ARGV == 2;
my ( $schema_class, $where ) = @ARGV;
die "Invalid schema class name: $schema_class"

Modified: trunk/devel/bin/predit
===================================================================
--- trunk/devel/bin/predit 2008-09-11 05:33:15 UTC (rev 3872)
+++ trunk/devel/bin/predit 2008-09-11 14:53:51 UTC (rev 3873)
@@ -51,7 +51,7 @@
return;
}

- # confirm with user that the change worked as intended.
+ # Confirm with user that the change worked as intended.
my $diff = diff( \$orig, \$edited );
print "\nFILE: $filepath\n$diff\nApply? ";
my $response = <STDIN>;

Modified: trunk/devel/bin/syncl
===================================================================
--- trunk/devel/bin/syncl 2008-09-11 05:33:15 UTC (rev 3872)
+++ trunk/devel/bin/syncl 2008-09-11 14:53:51 UTC (rev 3873)
@@ -67,7 +67,7 @@

next if $component =~ $exclude;

- # warn and skip files that aren't common
+ # Warn and skip files that aren't common.
for ( $source_path, $dest_path ) {
if ( !-e $_ ) {
print("Don't have $_ ... skipping...\n");
@@ -76,12 +76,12 @@
next FILE unless -f $_;
}

- # generate a diff if there are changes, otherwise skip
+ # Generate a diff if there are changes, otherwise skip.
my $source_content = slurp($source_path);
my $dest_content = slurp($dest_path);
my $edited = modify( $source_content, $dest_content );

- # search and replace prefixes, project name
+ # Search and replace prefixes, project name.
my $source_swap = $swaps{$source_proj};
my $dest_swap = $swaps{$dest_proj};
for ($edited) {
@@ -96,7 +96,7 @@
next;
}

- # confirm with user that the change worked as intended.
+ # Confirm with user that the change worked as intended.
my $diff = diff( \$dest_content, \$edited );
print "\nFILE: $dest_path\n$diff\nApply? ";
my $response = <STDIN>;

Modified: trunk/devel/bin/tidyall
===================================================================
--- trunk/devel/bin/tidyall 2008-09-11 05:33:15 UTC (rev 3872)
+++ trunk/devel/bin/tidyall 2008-09-11 14:53:51 UTC (rev 3873)
@@ -10,7 +10,7 @@
my $source_dir = shift @ARGV;
die "usage: tidyall DIR" unless defined $source_dir;

-# grab all perl filepaths
+# Grab all perl filepaths
my @paths;
find(
{ wanted => sub {
@@ -26,7 +26,7 @@
die "can't find kinotidyrc" unless -f $rc_filepath;

for my $path (@paths) {
- # grab orig text
+ # Grab orig text
print "$path\n";
open( my $fh, '<', $path )
or die "couldn't open file '$path' for reading: $!";

Modified: trunk/perl/buildlib/Lucy/Build.pm
===================================================================
--- trunk/perl/buildlib/Lucy/Build.pm 2008-09-11 05:33:15 UTC (rev 3872)
+++ trunk/perl/buildlib/Lucy/Build.pm 2008-09-11 14:53:51 UTC (rev 3873)
@@ -134,7 +134,7 @@
my $charmonize_c = catfile( $CHARMONIZER_ORIG_DIR, 'charmonize.c' );
my @all_source = ( $charmonize_c, @$charm_source_files );

- # don't compile if we're up to date
+ # Don't compile if we're up to date.
return if $self->up_to_date( \@all_source, $CHARMONIZE_EXE_PATH );

print "Building $CHARMONIZE_EXE_PATH...\n\n";
@@ -179,7 +179,7 @@
# Clean up after Charmonizer if it doesn't succeed on its own.
$self->add_to_cleanup("_charm*");

- # write the infile with which to communicate args to charmonize
+ # Write the infile with which to communicate args to charmonize.
my $os_name = lc( $Config{osname} );
my $flags = "$Config{ccflags} $EXTRA_CCFLAGS";
my $verbosity = $ENV{DEBUG_CHARM} ? 2 : 1;
@@ -485,7 +485,7 @@
$self->SUPER::ACTION_code;
}

-# copied from Module::Build::Base.pm, added exclude '#' and follow symlinks
+# Copied from Module::Build::Base.pm, added exclude '#' and follow symlinks.
sub rscan_dir {
my ( $self, $dir, $pattern ) = @_;
my @result;
@@ -501,7 +501,7 @@

File::Find::find( { wanted => $subr, no_chdir => 1, follow => 1 }, $dir );

- # skip emacs lock files
+ # Skip emacs lock files.
my @filtered = grep !/#/, @result;
return \@filtered;
}
@@ -560,7 +560,7 @@

=cut

-# write the typemap file.
+# Write the typemap file.
sub ACTION_write_typemap {
my $self = shift;

@@ -568,7 +568,7 @@
return
if ( -e 'typemap' and $self->up_to_date( $pm_filepaths, 'typemap' ) );

- # build up a list of C-struct classes
+ # Build up a list of C-struct classes.
my @struct_classes;
my $bp_filepaths = $self->rscan_dir( $C_SOURCE_DIR, qr/\.bp$/ );
for my $bp_path (@$bp_filepaths) {
@@ -604,7 +604,7 @@
END_OUTPUT
}

- # blast it out
+ # Blast it out.
print "Writing typemap\n";
unlink 'typemap';
sysopen( my $typemap_fh, 'typemap', O_CREAT | O_WRONLY | O_EXCL )
@@ -721,7 +721,7 @@

$self->SUPER::ACTION_dist;

- # clean up and restore MANIFEST
+ # Clean up and restore MANIFEST.
print "Removing copied files...\n";
rmtree('c_src');
rmtree('devel');
@@ -734,24 +734,24 @@
sub _gen_pause_exclusion_list {
my $self = shift;

- # only exclude files that are actually on-board
+ # Only exclude files that are actually on-board.
open( my $man_fh, '<', 'MANIFEST' ) or die "Can't open MANIFEST: $!";
my @manifest_entries = <$man_fh>;
chomp @manifest_entries;

my @excluded_files;
for my $entry ( sort @manifest_entries ) {
- # allow README
+ # Allow README.
next if $entry =~ m#^README#;

- # allow public modules
+ # Allow public modules.
if ( $entry =~ m#^lib.+\.(pm|pod)$# ) {
open( my $fh, '<', $entry ) or die "Can't open '$entry': $!";
my $content = do { local $/; <$fh> };
next if $content =~ /=head1\s*NAME/;
}

- # disallow everything else
+ # Disallow everything else.
push @excluded_files, $entry;
}

_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits