Mailing List Archive

r3795 - in trunk/perl: buildlib sample
Author: creamyg
Date: 2008-08-29 14:31:27 -0700 (Fri, 29 Aug 2008)
New Revision: 3795

Modified:
trunk/perl/buildlib/KinoTestUtils.pm
trunk/perl/sample/USConSchema.pm
trunk/perl/sample/invindexer.pl
Log:
Add a 'category' field to USConSchema.


Modified: trunk/perl/buildlib/KinoTestUtils.pm
===================================================================
--- trunk/perl/buildlib/KinoTestUtils.pm 2008-08-29 21:13:37 UTC (rev 3794)
+++ trunk/perl/buildlib/KinoTestUtils.pm 2008-08-29 21:31:27 UTC (rev 3795)
@@ -131,11 +131,17 @@
my $bodytext = $1;
$bodytext =~ s/<.*?>//sg;
$bodytext =~ s/\s+/ /sg;
+ my $category
+ = $filename =~ /art/ ? 'article'
+ : $filename =~ /amend/ ? 'amendment'
+ : $filename =~ /preamble/ ? 'preamble'
+ : confess "Can't derive category for $filename";

$docs{$filename} = {
title => $title,
bodytext => $bodytext,
url => "/us_constitution/$filename",
+ category => $category,
};
}


Modified: trunk/perl/sample/USConSchema.pm
===================================================================
--- trunk/perl/sample/USConSchema.pm 2008-08-29 21:13:37 UTC (rev 3794)
+++ trunk/perl/sample/USConSchema.pm 2008-08-29 21:31:27 UTC (rev 3795)
@@ -1,18 +1,23 @@
use strict;
use warnings;

-package USConSchema::UnIndexedField;
+package USConSchema::NotIndexed;
use base 'KinoSearch::FieldSpec::TextField';
sub indexed {0}

+package USConSchema::NotAnalyzed;
+use base 'KinoSearch::FieldSpec::TextField';
+sub analyzed {0}
+
package USConSchema;
use base 'KinoSearch::Schema';
use KinoSearch::Analysis::PolyAnalyzer;

our %fields = (
- title => 'text',
- content => 'text',
- url => 'USConSchema::UnIndexedField',
+ title => 'text',
+ content => 'text',
+ url => 'USConSchema::NotIndexed',
+ category => 'USConSchema::NotAnalyzed',
);

sub analyzer {

Modified: trunk/perl/sample/invindexer.pl
===================================================================
--- trunk/perl/sample/invindexer.pl 2008-08-29 21:13:37 UTC (rev 3794)
+++ trunk/perl/sample/invindexer.pl 2008-08-29 21:31:27 UTC (rev 3795)
@@ -33,7 +33,7 @@
print "Finished.\n";

# Parse an HTML file from our US Constitution collection and return a
-# hashref with three keys: title, body, and url.
+# hashref with the fields title, body, url, and category.
sub parse_file {
my $filename = shift;
my $filepath = catfile( $conf->{uscon_source}, $filename );
@@ -43,10 +43,16 @@
or die "No title element in $filepath";
my $bodytext_node = $tree->look_down( id => 'bodytext' )
or die "No div with id 'bodytext' in $filepath";
+ my $category
+ = $filename =~ /art/ ? 'article'
+ : $filename =~ /amend/ ? 'amendment'
+ : $filename =~ /preamble/ ? 'preamble'
+ : die "Can't derive category for $filename";
return {
title => $title_node->as_trimmed_text,
content => $bodytext_node->as_trimmed_text,
- url => "/us_constitution/$filename"
+ url => "/us_constitution/$filename",
+ category => $category,
};
}



_______________________________________________
kinosearch-commits mailing list
kinosearch-commits@rectangular.com
http://www.rectangular.com/mailman/listinfo/kinosearch-commits