Mailing List Archive

cvs commit: jakarta-lucene/src/java/org/apache/lucene/search HitCollector.java IndexSearcher.java MultiSearcher.java Searcher.java
cutting 01/09/25 12:03:35

Modified: src/java/org/apache/lucene/search HitCollector.java
IndexSearcher.java MultiSearcher.java Searcher.java
Log:
Incorporated Joanne Sproston's changes to extend lower-level HitCollector-based search API to MultiSearcher. I have not yet tested this.

Revision Changes Path
1.2 +8 -2 jakarta-lucene/src/java/org/apache/lucene/search/HitCollector.java

Index: HitCollector.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/HitCollector.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- HitCollector.java 2001/09/18 16:29:56 1.1
+++ HitCollector.java 2001/09/25 19:03:35 1.2
@@ -55,7 +55,7 @@
*/

/** Lower-level search API.
- * @see IndexSearcher#search(Query,HitCollector)
+ * @see Searcher#search(Query,HitCollector)
*/
public abstract class HitCollector {
/** Called once for every non-zero scoring document, with the document number
@@ -71,6 +71,12 @@
* }
* });
* </pre>
- */
+ *
+ * <p>Note: This is called in an inner search loop. For good search
+ * performance, implementations of this method should not call {@link
+ * Searcher#doc(int)} or {@link
+ * org.apache.lucene.index.IndexReader#document(int)} on every document
+ * number encountered. Doing so can slow searches by an order of magnitude
+ * or more. */
public abstract void collect(int doc, float score);
}



1.2 +7 -15 jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java

Index: IndexSearcher.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- IndexSearcher.java 2001/09/18 16:29:57 1.1
+++ IndexSearcher.java 2001/09/25 19:03:35 1.2
@@ -91,7 +91,8 @@
return reader.docFreq(term);
}

- final Document doc(int i) throws IOException {
+ /** For use by {@link HitCollector} implementations. */
+ public final Document doc(int i) throws IOException {
return reader.document(i);
}

@@ -140,21 +141,12 @@
* <p>Applications should only use this if they need <it>all</it> of the
* matching documents. The high-level search API ({@link
* Searcher#search(Query)}) is usually more efficient, as it skips
- * non-high-scoring hits. */
- public final void search(Query query, HitCollector results)
- throws IOException {
- search(query, null, results);
- }
-
- /** Lower-level search API.
+ * non-high-scoring hits.
*
- * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
- * scoring document.
- *
- * <p>Applications should only use this if they need <it>all</it> of the
- * matching documents. The high-level search API ({@link
- * Searcher#search(Query)}) is usually more efficient, as it skips
- * non-high-scoring hits. */
+ * @param query to match documents
+ * @param filter if non-null, a bitset used to eliminate some documents
+ * @param results to receive hits
+ */
public final void search(Query query, Filter filter,
final HitCollector results) throws IOException {
HitCollector collector = results;



1.2 +37 -3 jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java

Index: MultiSearcher.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/MultiSearcher.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- MultiSearcher.java 2001/09/18 16:29:57 1.1
+++ MultiSearcher.java 2001/09/25 19:03:35 1.2
@@ -92,13 +92,16 @@
return docFreq;
}

- final Document doc(int n) throws IOException {
+ /** For use by {@link HitCollector} implementations. */
+ public final Document doc(int n) throws IOException {
int i = searcherIndex(n); // find searcher index
return searchers[i].doc(n - starts[i]); // dispatch to searcher
}

- // replace w/ call to Arrays.binarySearch in Java 1.2
- private final int searcherIndex(int n) { // find searcher for doc n:
+ /** For use by {@link HitCollector} implementations to identify the
+ * index of the sub-searcher that a particular hit came from. */
+ public final int searcherIndex(int n) { // find searcher for doc n:
+ // replace w/ call to Arrays.binarySearch in Java 1.2
int lo = 0; // search starts array
int hi = searchers.length - 1; // for first element less
// than n, return its index
@@ -148,5 +151,36 @@
scoreDocs[i] = (ScoreDoc)hq.pop();

return new TopDocs(totalHits, scoreDocs);
+ }
+
+
+ /** Lower-level search API.
+ *
+ * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
+ * scoring document.
+ *
+ * <p>Applications should only use this if they need <it>all</it> of the
+ * matching documents. The high-level search API ({@link
+ * Searcher#search(Query)}) is usually more efficient, as it skips
+ * non-high-scoring hits.
+ *
+ * @param query to match documents
+ * @param filter if non-null, a bitset used to eliminate some documents
+ * @param results to receive hits
+ */
+ public final void search(Query query, Filter filter,
+ final HitCollector results)
+ throws IOException {
+ for (int i = 0; i < searchers.length; i++) {
+
+ final int start = starts[i];
+
+ searchers[i].search(query, filter, new HitCollector() {
+ public void collect(int doc, float score) {
+ results.collect(doc + start, score);
+ }
+ });
+
+ }
}
}



1.2 +35 -3 jakarta-lucene/src/java/org/apache/lucene/search/Searcher.java

Index: Searcher.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/Searcher.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Searcher.java 2001/09/18 16:29:58 1.1
+++ Searcher.java 2001/09/25 19:03:35 1.2
@@ -66,15 +66,46 @@

/** Returns the documents matching <code>query</code>. */
public final Hits search(Query query) throws IOException {
- return search(query, null);
+ return search(query, (Filter)null);
}

/** Returns the documents matching <code>query</code> and
<code>filter</code>. */
- public final Hits search(Query query, Filter filter) throws IOException {
+ public Hits search(Query query, Filter filter) throws IOException {
return new Hits(this, query, filter);
}

+ /** Lower-level search API.
+ *
+ * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
+ * scoring document.
+ *
+ * <p>Applications should only use this if they need <it>all</it> of the
+ * matching documents. The high-level search API ({@link
+ * Searcher#search(Query)}) is usually more efficient, as it skips
+ * non-high-scoring hits. */
+ public void search(Query query, HitCollector results)
+ throws IOException {
+ search(query, (Filter)null, results);
+ }
+
+ /** Lower-level search API.
+ *
+ * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
+ * scoring document.
+ *
+ * <p>Applications should only use this if they need <it>all</it> of the
+ * matching documents. The high-level search API ({@link
+ * Searcher#search(Query)}) is usually more efficient, as it skips
+ * non-high-scoring hits.
+ *
+ * @param query to match documents
+ * @param filter if non-null, a bitset used to eliminate some documents
+ * @param results to receive hits
+ */
+ public abstract void search(Query query, Filter filter, HitCollector results)
+ throws IOException;
+
/** Frees resources associated with this Searcher. */
abstract public void close() throws IOException;

@@ -82,6 +113,7 @@
abstract int maxDoc() throws IOException;
abstract TopDocs search(Query query, Filter filter, int n)
throws IOException;
- abstract Document doc(int i) throws IOException;

+ /** For use by {@link HitCollector} implementations. */
+ public abstract Document doc(int i) throws IOException;
}