Mailing List Archive: cvs commit: jakarta-lucene/src/java/org/apache/lucene/queryParser QueryParser.jj

otis 2002/07/14 10:16:21

Modified: src/java/org/apache/lucene/queryParser QueryParser.jj
Log:
- Added Péter Halácsy's changes that allow setting of default boolean
operator.

Revision Changes Path
1.19 +105 -62 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj

Index: QueryParser.jj
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -r1.18 -r1.19
--- QueryParser.jj 25 Jun 2002 00:05:31 -0000 1.18
+++ QueryParser.jj 14 Jul 2002 17:16:21 -0000 1.19
@@ -78,7 +78,7 @@
*
* The syntax for query strings is as follows:
* A Query is a series of clauses.
- * A clause may be prefixed by:
+ * A clause may be prefixed by:
* <ul>
* <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
* that the clause is required or prohibited respectively; or
@@ -121,11 +121,11 @@
QueryParser parser = new QueryParser(field, analyzer);
return parser.parse(query);
}
- catch (TokenMgrError tme) {
+ catch (TokenMgrError tme) {
throw new ParseException(tme.getMessage());
}
}
-
+
Analyzer analyzer;
String field;
int phraseSlop = 0;
@@ -157,8 +157,30 @@
/** Gets the default slop for phrases. */
public int getPhraseSlop() { return phraseSlop; }

- private void addClause(Vector clauses, int conj, int mods,
- Query q) {
+ // CODE ADDED BY PETER HALACSY
+
+ /** The actual mode that parses uses to parse queries */
+ public static final int DEFAULT_OPERATOR_OR = 0;
+ public static final int DEFAULT_OPERATOR_AND = 1;
+
+ private int mode = DEFAULT_OPERATOR_OR;
+
+ /**
+ * Set the mode of the QueryParser. In classic mode (<code>DEFAULT_OPERATOR_OR</mode>)
+ * term without any modifiers are considered optional: for example <code>
+ * capital of Hungary</code> is equal to <code>capital OR of OR Hungary</code>.<br/>
+ * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
+ * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
+ */
+ public void setMode(int mode) {
+ this.mode = mode;
+ }
+
+ public int getMode() {
+ return this.mode;
+ }
+
+ private void addClause(Vector clauses, int conj, int mods, Query q) {
boolean required, prohibited;

// If this term is introduced by AND, make the preceding term required,
@@ -168,28 +190,49 @@
if (!c.prohibited)
c.required = true;
}
+ // THIS CODE ADDED PETER HALACSY
+ if(mode == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
+ // If this term is introduced by OR, make the preceding term optional,
+ // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
+ // notice if the input is a OR b, first term is parsed as required; without
+ // this modification a OR b would parsed as +a OR b
+ BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
+ if (!c.prohibited)
+ c.required = false;
+ }
+ // THIS CODE ADDED BY PETER HALACSY

// We might have been passed a null query; the term might have been
- // filtered away by the analyzer.
+ // filtered away by the analyzer.
if (q == null)
return;

- // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
- // introduced by NOT or -; make sure not to set both.
- prohibited = (mods == MOD_NOT);
- required = (mods == MOD_REQ);
- if (conj == CONJ_AND && !prohibited)
- required = true;
+ if(mode == DEFAULT_OPERATOR_OR) {
+ // THIS IS THE ORIGINAL CODE
+ // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
+ // introduced by NOT or -; make sure not to set both.
+ prohibited = (mods == MOD_NOT);
+ required = (mods == MOD_REQ);
+ if (conj == CONJ_AND && !prohibited) {
+ required = true;
+ }
+ } else {
+ // THIS CODE ADDED BY PETER HALACSY
+ // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
+ // if not PROHIBITED and not introduced by OR
+ prohibited = (mods == MOD_NOT);
+ required = (!prohibited && conj != CONJ_OR);
+ }
clauses.addElement(new BooleanClause(q, required, prohibited));
}

- private Query getFieldQuery(String field,
- Analyzer analyzer,
+ private Query getFieldQuery(String field,
+ Analyzer analyzer,
String queryText) {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
-
- TokenStream source = analyzer.tokenStream(field,
+
+ TokenStream source = analyzer.tokenStream(field,
new StringReader(queryText));
Vector v = new Vector();
org.apache.lucene.analysis.Token t;
@@ -197,17 +240,17 @@
while (true) {
try {
t = source.next();
- }
+ }
catch (IOException e) {
t = null;
}
- if (t == null)
+ if (t == null)
break;
v.addElement(t.termText());
}
- if (v.size() == 0)
+ if (v.size() == 0)
return null;
- else if (v.size() == 1)
+ else if (v.size() == 1)
return new TermQuery(new Term(field, (String) v.elementAt(0)));
else {
PhraseQuery q = new PhraseQuery();
@@ -219,11 +262,11 @@
}
}

- private Query getRangeQuery(String field,
- Analyzer analyzer,
- String part1,
+ private Query getRangeQuery(String field,
+ Analyzer analyzer,
+ String part1,
String part2,
- boolean inclusive)
+ boolean inclusive)
{
boolean isDate = false, isNumber = false;

@@ -242,13 +285,13 @@
// @@@ Add number support
}

- return new RangeQuery(new Term(field, part1),
- new Term(field, part2),
+ return new RangeQuery(new Term(field, part1),
+ new Term(field, part2),
inclusive);
}

public static void main(String[] args) throws Exception {
- QueryParser qp = new QueryParser("field",
+ QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));
@@ -271,10 +314,10 @@

<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] >
-| <#_ESCAPED_CHAR: "\\" [. "\\", "+", "-", "!", "(", ")", ":", "^",
+| <#_ESCAPED_CHAR: "\\" [. "\\", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?" ] >
-| <#_TERM_START_CHAR: ( ~[. " ", "\t", "+", "-", "!", "(", ")", ":", "^",
- "[", "]", "\"", "{", "}", "~", "*", "?" ]
+| <#_TERM_START_CHAR: ( ~[. " ", "\t", "+", "-", "!", "(", ")", ":", "^",
+ "[", "]", "\"", "{", "}", "~", "*", "?" ]
| <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> ) >
| <#_WHITESPACE: ( " " | "\t" ) >
@@ -299,7 +342,7 @@
| <FUZZY: "~" >
| <SLOP: "~" (<_NUM_CHAR>)+ >
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
-| <WILDTERM: <_TERM_START_CHAR>
+| <WILDTERM: <_TERM_START_CHAR>
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <RANGEIN_START: "[." > : RangeIn
| <RANGEEX_START: "{" > : RangeEx
@@ -326,23 +369,23 @@
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )

-int Conjunction() : {
+int Conjunction() : {
int ret = CONJ_NONE;
}
{
- [.
- <AND> { ret = CONJ_AND; }
+ [
+ <AND> { ret = CONJ_AND; }
| <OR> { ret = CONJ_OR; }
]
{ return ret; }
}

-int Modifiers() : {
+int Modifiers() : {
int ret = MOD_NONE;
}
{
- [.
- <PLUS> { ret = MOD_REQ; }
+ [
+ <PLUS> { ret = MOD_REQ; }
| <MINUS> { ret = MOD_NOT; }
| <NOT> { ret = MOD_NOT; }
]
@@ -353,17 +396,17 @@
{
Vector clauses = new Vector();
Query q, firstQuery=null;
- int conj, mods;
+ int conj, mods;
}
{
- mods=Modifiers() q=Clause(field)
- {
- addClause(clauses, CONJ_NONE, mods, q);
- if (mods == MOD_NONE)
- firstQuery=q;
+ mods=Modifiers() q=Clause(field)
+ {
+ addClause(clauses, CONJ_NONE, mods, q);
+ if (mods == MOD_NONE)
+ firstQuery=q;
}
- (
- conj=Conjunction() mods=Modifiers() q=Clause(field)
+ (
+ conj=Conjunction() mods=Modifiers() q=Clause(field)
{ addClause(clauses, conj, mods, q); }
)*
{
@@ -389,16 +432,16 @@
]

(
- q=Term(field)
+ q=Term(field)
| <LPAREN> q=Query(field) <RPAREN>
)
{
return q;
}
}
-

-Query Term(String field) : {
+
+Query Term(String field) : {
Token term, boost=null, slop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
@@ -407,7 +450,7 @@
Query q;
}
{
- (
+ (
(
term=<TERM>
| term=<PREFIXTERM> { prefix=true; }
@@ -416,19 +459,19 @@
)
[ <FUZZY> { fuzzy=true; } ]
[ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
- {
+ {
if (wildcard)
q = new WildcardQuery(new Term(field, term.image));
- else if (prefix)
+ else if (prefix)
q = new PrefixQuery(new Term(field, term.image.substring
(0, term.image.length()-1)));
else if (fuzzy)
q = new FuzzyQuery(new Term(field, term.image));
else
- q = getFieldQuery(field, analyzer, term.image);
+ q = getFieldQuery(field, analyzer, term.image);
}
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
- [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
+ [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
<RANGEIN_END> )
[ <CARAT> boost=<NUMBER> ]
{
@@ -440,7 +483,7 @@
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
}
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
- [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
+ [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
<RANGEEX_END> )
[ <CARAT> boost=<NUMBER> ]
{
@@ -451,14 +494,14 @@

q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
}
- | term=<QUOTED>
+ | term=<QUOTED>
[ slop=<SLOP> ]
[ <CARAT> boost=<NUMBER> ]
- {
- q = getFieldQuery(field, analyzer,
- term.image.substring(1, term.image.length()-1));
+ {
+ q = getFieldQuery(field, analyzer,
+ term.image.substring(1, term.image.length()-1));
if (slop != null && q instanceof PhraseQuery) {
- try {
+ try {
int s = Float.valueOf(slop.image.substring(1)).intValue();
((PhraseQuery) q).setSlop(s);
}
@@ -466,16 +509,16 @@
}
}
)
- {
+ {
if (boost != null) {
float f = (float) 1.0;
- try {
+ try {
f = Float.valueOf(boost.image).floatValue();
}
catch (Exception ignored) { }

q.setBoost(f);
}
- return q;
+ return q;
}
}

--
To unsubscribe, e-mail: <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>

otis 2002/07/14 10:21:57

Modified: src/java/org/apache/lucene/queryParser QueryParser.jj
Log:
- Renamed a few recently added variables and methods for consistency.

Revision Changes Path
1.20 +12 -11 jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj

Index: QueryParser.jj
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- QueryParser.jj 14 Jul 2002 17:16:21 -0000 1.19
+++ QueryParser.jj 14 Jul 2002 17:21:57 -0000 1.20
@@ -159,25 +159,26 @@

// CODE ADDED BY PETER HALACSY

- /** The actual mode that parses uses to parse queries */
public static final int DEFAULT_OPERATOR_OR = 0;
public static final int DEFAULT_OPERATOR_AND = 1;

- private int mode = DEFAULT_OPERATOR_OR;
+ /** The actual operator that parser uses to combine query terms */
+ private int operator = DEFAULT_OPERATOR_OR;

/**
- * Set the mode of the QueryParser. In classic mode (<code>DEFAULT_OPERATOR_OR</mode>)
- * term without any modifiers are considered optional: for example <code>
- * capital of Hungary</code> is equal to <code>capital OR of OR Hungary</code>.<br/>
+ * Set the boolean operator of the QueryParser.
+ * In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) terms without any modifiers
+ * are considered optional: for example <code>capital of Hungary</code> is equal to
+ * <code>capital OR of OR Hungary</code>.<br/>
* In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/
- public void setMode(int mode) {
- this.mode = mode;
+ public void setOperator(int operaror) {
+ this.operator = operator;
}

- public int getMode() {
- return this.mode;
+ public int getOperator() {
+ return this.operator;
}

private void addClause(Vector clauses, int conj, int mods, Query q) {
@@ -191,7 +192,7 @@
c.required = true;
}
// THIS CODE ADDED PETER HALACSY
- if(mode == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
+ if (operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
@@ -207,7 +208,7 @@
if (q == null)
return;

- if(mode == DEFAULT_OPERATOR_OR) {
+ if (operator == DEFAULT_OPERATOR_OR) {
// THIS IS THE ORIGINAL CODE
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
// introduced by NOT or -; make sure not to set both.

--
To unsubscribe, e-mail: <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>