Mailing List Archive

counting words
Hi
I'm a new user to lucene but was wondering if there was a way of
finding out how many times a word occurs in a document/s using lucene.
Thanks for the help
Anthony Stern

I have tried modifying some of the code given in the examples. My
attempt is shown below

package lucene;

import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;


class SearchFiles {
public static void main(String[] args) {
try {
IndexReader reader = IndexReader.open("index");
Searcher searcher = new IndexSearcher("index");
Analyzer analyzer = new StopAnalyzer();

BufferedReader in = new BufferedReader(new
InputStreamReader(System.in));
while (true) {
System.out.print("Query: ");
String line = in.readLine();

if (line.length() == -1)
break;

Query query = QueryParser.parse(line, "contents", analyzer);
System.out.println("Searching for: " +
query.toString("contents"));

Hits hits = searcher.search(query);
System.out.println(hits.length() + " total matching documents");

System.out.println("last modified " + reader.lastModified("index"));
Term term = new Term("body", line);
Termdocs help = reader.termDocs();
System.out.println("frequency " + help.freq());

final int HITS_PER_PAGE = 10;
for (int start = 0; start < hits.length(); start +=
HITS_PER_PAGE) {
int end = Math.min(hits.length(), start + HITS_PER_PAGE);
for (int i = start; i < end; i++)
System.out.println(i + ". " +
hits.doc(i).get("path")+hits.score(i));
if (hits.length() > end) {
System.out.print("more (y/n) ? ");
line = in.readLine();
if (line.length() == 0 || line.charAt(0) == 'n')
break;
}
}
}
searcher.close();

} catch (Exception e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
}
}

--
To unsubscribe, e-mail: <mailto:lucene-user-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-user-help@jakarta.apache.org>