Mailing List Archive

cvs commit: jakarta-lucene/src/test/org/apache/lucene/queryParser TestQueryParser.java
briangoetz 2002/06/24 17:05:31

Modified: src/java/org/apache/lucene/document DateField.java
Field.java
src/java/org/apache/lucene/queryParser QueryParser.jj
src/test/org/apache/lucene/queryParser TestQueryParser.java
Log:
Support for new range query syntax. The delimiter is " TO ", but is optional
for backward compatibility with previous syntax. If the range arguments
match the format supported by DateFormat.getDateInstance(DateFormat.SHORT),
then they will be converted into the appropriate date strings a la DateField.

Added Field.Keyword "constructor" for Date-valued arguments.

Optimized DateField.timeToString function.

Submitted by: Brian Goetz

Revision Changes Path
1.4 +7 -2 jakarta-lucene/src/java/org/apache/lucene/document/DateField.java

Index: DateField.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/document/DateField.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- DateField.java 9 Jun 2002 20:47:22 -0000 1.3
+++ DateField.java 25 Jun 2002 00:05:31 -0000 1.4
@@ -105,8 +105,13 @@
if (s.length() > DATE_LEN)
throw new RuntimeException("time too late");

- while (s.length() < DATE_LEN)
- s = "0" + s; // pad with leading zeros
+ // Pad with leading zeros
+ if (s.length() < DATE_LEN) {
+ StringBuffer sb = new StringBuffer(s);
+ while (sb.length() < DATE_LEN)
+ sb.insert(0, ' ');
+ s = sb.toString();
+ }

return s;
}



1.3 +8 -0 jakarta-lucene/src/java/org/apache/lucene/document/Field.java

Index: Field.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/document/Field.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- Field.java 20 Nov 2001 05:22:31 -0000 1.2
+++ Field.java 25 Jun 2002 00:05:31 -0000 1.3
@@ -55,6 +55,7 @@
*/

import java.io.Reader;
+import java.util.Date;

/**
A field is a section of a Document. Each field has two parts, a name and a
@@ -89,6 +90,13 @@
fields, like "title" or "subject". */
public static final Field Text(String name, String value) {
return new Field(name, value, true, true, true);
+ }
+
+ /** Constructs a Date-valued Field that is tokenized and indexed,
+ and is stored in the index, for return with hits. Useful for short text
+ fields, like "title" or "subject". */
+ public static final Field Keyword(String name, Date value) {
+ return new Field(name, DateField.dateToString(value), true, true, true);
}

/** Constructs a String-valued Field that is tokenized and indexed,



1.18 +62 -33 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj

Index: QueryParser.jj
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -r1.17 -r1.18
--- QueryParser.jj 20 May 2002 15:45:43 -0000 1.17
+++ QueryParser.jj 25 Jun 2002 00:05:31 -0000 1.18
@@ -65,8 +65,11 @@

import java.util.Vector;
import java.io.*;
+import java.text.*;
+import java.util.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
+import org.apache.lucene.document.*;
import org.apache.lucene.search.*;

/**
@@ -218,35 +221,30 @@

private Query getRangeQuery(String field,
Analyzer analyzer,
- String queryText,
+ String part1,
+ String part2,
boolean inclusive)
{
- // Use the analyzer to get all the tokens. There should be 1 or 2.
- TokenStream source = analyzer.tokenStream(field,
- new StringReader(queryText));
- Term[] terms = new Term[2];
- org.apache.lucene.analysis.Token t;
+ boolean isDate = false, isNumber = false;

- for (int i = 0; i < 2; i++)
- {
- try
- {
- t = source.next();
- }
- catch (IOException e)
- {
- t = null;
- }
- if (t != null)
- {
- String text = t.termText();
- if (!text.equalsIgnoreCase("NULL"))
- {
- terms[i] = new Term(field, text);
- }
- }
+ try {
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
+ df.setLenient(true);
+ Date d1 = df.parse(part1);
+ Date d2 = df.parse(part2);
+ part1 = DateField.dateToString(d1);
+ part2 = DateField.dateToString(d2);
+ isDate = true;
}
- return new RangeQuery(terms[0], terms[1], inclusive);
+ catch (Exception e) { }
+
+ if (!isDate) {
+ // @@@ Add number support
+ }
+
+ return new RangeQuery(new Term(field, part1),
+ new Term(field, part2),
+ inclusive);
}

public static void main(String[] args) throws Exception {
@@ -282,7 +280,7 @@
| <#_WHITESPACE: ( " " | "\t" ) >
}

-<DEFAULT> SKIP : {
+<DEFAULT, RangeIn, RangeEx> SKIP : {
<<_WHITESPACE>>
}

@@ -303,14 +301,28 @@
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
| <WILDTERM: <_TERM_START_CHAR>
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
-| <RANGEIN: "[" ( ~[ "]" ] )+ "]">
-| <RANGEEX: "{" ( ~[ "}" ] )+ "}">
+| <RANGEIN_START: "[." > : RangeIn
+| <RANGEEX_START: "{" > : RangeEx
}

<Boost> TOKEN : {
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
}

+<RangeIn> TOKEN : {
+<RANGEIN_TO: "TO">
+| <RANGEIN_END: "]"> : DEFAULT
+| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
+| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
+}
+
+<RangeEx> TOKEN : {
+<RANGEEX_TO: "TO">
+| <RANGEEX_END: "}"> : DEFAULT
+| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
+| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
+}
+
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )

@@ -387,7 +399,7 @@


Query Term(String field) : {
- Token term, boost=null, slop=null;
+ Token term, boost=null, slop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
@@ -415,12 +427,29 @@
else
q = getFieldQuery(field, analyzer, term.image);
}
- | ( term=<RANGEIN> { rangein=true; } | term=<RANGEEX> )
+ | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
+ [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
+ <RANGEIN_END> )
+ [ <CARAT> boost=<NUMBER> ]
+ {
+ if (goop1.kind == RANGEIN_QUOTED)
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ if (goop2.kind == RANGEIN_QUOTED)
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+
+ q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
+ }
+ | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
+ [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
+ <RANGEEX_END> )
[ <CARAT> boost=<NUMBER> ]
{
- q = getRangeQuery(field, analyzer,
- term.image.substring(1, term.image.length()-1),
- rangein);
+ if (goop1.kind == RANGEEX_QUOTED)
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ if (goop2.kind == RANGEEX_QUOTED)
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+
+ q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
}
| term=<QUOTED>
[ slop=<SLOP> ]



1.12 +25 -10 jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java

Index: TestQueryParser.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- TestQueryParser.java 6 May 2002 21:59:44 -0000 1.11
+++ TestQueryParser.java 25 Jun 2002 00:05:31 -0000 1.12
@@ -55,11 +55,14 @@
*/

import java.io.*;
+import java.text.*;
+import java.util.*;
import junit.framework.*;

import org.apache.lucene.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
+import org.apache.lucene.document.DateField;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.analysis.Token;
@@ -235,16 +238,28 @@
}

public void testRange() throws Exception {
- assertQueryEquals("[ a z]", null, "[a-z]");
- assertTrue(getQuery("[ a z]", null) instanceof RangeQuery);
- assertQueryEquals("[ a z ]", null, "[a-z]");
- assertQueryEquals("{ a z}", null, "{a-z}");
- assertQueryEquals("{ a z }", null, "{a-z}");
- assertQueryEquals("{ a z }^2.0", null, "{a-z}^2.0");
- assertQueryEquals("[ a z] OR bar", null, "[a-z] bar");
- assertQueryEquals("[ a z] AND bar", null, "+[a-z] +bar");
- assertQueryEquals("( bar blar { a z}) ", null, "bar blar {a-z}");
- assertQueryEquals("gack ( bar blar { a z}) ", null, "gack (bar blar {a-z})");
+ assertQueryEquals("[ a TO z]", null, "[a-z]");
+ assertTrue(getQuery("[ a TO z]", null) instanceof RangeQuery);
+ assertQueryEquals("[ a TO z ]", null, "[a-z]");
+ assertQueryEquals("{ a TO z}", null, "{a-z}");
+ assertQueryEquals("{ a TO z }", null, "{a-z}");
+ assertQueryEquals("{ a TO z }^2.0", null, "{a-z}^2.0");
+ assertQueryEquals("[ a TO z] OR bar", null, "[a-z] bar");
+ assertQueryEquals("[ a TO z] AND bar", null, "+[a-z] +bar");
+ assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a-z}");
+ assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a-z})");
+ }
+
+ public String getDate(String s) throws Exception {
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
+ return DateField.dateToString(df.parse(s));
+ }
+
+ public void testDateRange() throws Exception {
+ assertQueryEquals("[ 1/1/02 TO 1/4/02]", null,
+ "[" + getDate("1/1/02") + "-" + getDate("1/4/02") + "]");
+ assertQueryEquals("{ 1/1/02 1/4/02 }", null,
+ "{" + getDate("1/1/02") + "-" + getDate("1/4/02") + "}");
}

public void testEscaped() throws Exception {




--
To unsubscribe, e-mail: <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>