Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java
===================================================================
--- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 452621)
+++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy)
@@ -143,10 +143,11 @@
}
}
- public void assertWildcardQueryEquals(String query, boolean lowercase, String result)
+ public void assertWildcardQueryEquals(String query, boolean lowercase, String result, boolean allowZeroLengthPrefixQuery )
throws Exception {
QueryParser qp = getParser(null);
qp.setLowercaseExpandedTerms(lowercase);
+ qp.setAllowZeroLengthPrefixQuery( allowZeroLengthPrefixQuery );
Query q = qp.parse(query);
String s = q.toString("field");
if (!s.equals(result)) {
@@ -155,6 +156,11 @@
}
}
+ public void assertWildcardQueryEquals(String query, boolean lowercase, String result)
+ throws Exception {
+ assertWildcardQueryEquals( query, lowercase, result, false );
+ }
+
public void assertWildcardQueryEquals(String query, String result) throws Exception {
QueryParser qp = getParser(null);
Query q = qp.parse(query);
@@ -330,6 +336,22 @@
assertWildcardQueryEquals("[A TO C]", "[a TO c]");
assertWildcardQueryEquals("[A TO C]", true, "[a TO c]");
assertWildcardQueryEquals("[A TO C]", false, "[A TO C]");
+// Test suffix queries: first disallow
+ try {
+ assertWildcardQueryEquals("*Term", true, "*term");
+ fail();
+ } catch(ParseException pe) {
+ // expected exception
+ }
+ try {
+ assertWildcardQueryEquals("?Term", true, "?term");
+ fail();
+ } catch(ParseException pe) {
+ // expected exception
+ }
+// Test suffix queries: then allow
+ assertWildcardQueryEquals("*Term", true, "*term", true);
+ assertWildcardQueryEquals("?Term", true, "?term", true);
}
public void testQPA() throws Exception {
Index: src/java/org/apache/lucene/queryParser/QueryParser.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 452621)
+++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy)
@@ -82,6 +82,7 @@
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
+ boolean allowZeroLengthPrefixQuery = false;
Analyzer analyzer;
String field;
@@ -195,6 +196,22 @@
/**
+ * Set to true to allow * and ? as the first character
+ * of a PrefixQuery and WildcardQuery. Note that this can produce very slow
+ * queries on big indexes. Default: false.
+ */
+ public void setAllowZeroLengthPrefixQuery(boolean allowZeroLengthPrefixQuery) {
+ this.allowZeroLengthPrefixQuery = allowZeroLengthPrefixQuery;
+ }
+
+ /**
+ * @see #setAllowZeroLengthPrefixQuery
+ */
+ public boolean getAllowZeroLengthPrefixQuery() {
+ return allowZeroLengthPrefixQuery;
+ }
+
+ /**
* Sets the boolean operator of the QueryParser.
* In default mode (OR_OPERATOR) terms without any modifiers
* are considered optional: for example capital of Hungary is equal to
@@ -506,6 +523,8 @@
*/
protected Query getWildcardQuery(String field, String termStr) throws ParseException
{
+ if (!allowZeroLengthPrefixQuery && (termStr.startsWith("*") || termStr.startsWith("?")))
+ throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
@@ -538,6 +557,8 @@
*/
protected Query getPrefixQuery(String field, String termStr) throws ParseException
{
+ if (!allowZeroLengthPrefixQuery && termStr.startsWith("*"))
+ throw new ParseException("'*' not allowed as first character in PrefixQuery");
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Index: src/java/org/apache/lucene/queryParser/QueryParser.jj
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 452621)
+++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy)
@@ -105,6 +105,7 @@
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
+ boolean allowZeroLengthPrefixQuery = false;
Analyzer analyzer;
String field;
@@ -218,6 +219,22 @@
/**
+ * Set to true to allow * and ? as the first character
+ * of a PrefixQuery and WildcardQuery. Note that this can produce very slow
+ * queries on big indexes. Default: false.
+ */
+ public void setAllowZeroLengthPrefixQuery(boolean allowZeroLengthPrefixQuery) {
+ this.allowZeroLengthPrefixQuery = allowZeroLengthPrefixQuery;
+ }
+
+ /**
+ * @see #setAllowZeroLengthPrefixQuery
+ */
+ public boolean getAllowZeroLengthPrefixQuery() {
+ return allowZeroLengthPrefixQuery;
+ }
+
+ /**
* Sets the boolean operator of the QueryParser.
* In default mode (OR_OPERATOR) terms without any modifiers
* are considered optional: for example capital of Hungary is equal to
@@ -529,6 +546,8 @@
*/
protected Query getWildcardQuery(String field, String termStr) throws ParseException
{
+ if (!allowZeroLengthPrefixQuery && (termStr.startsWith("*") || termStr.startsWith("?")))
+ throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
@@ -561,6 +580,8 @@
*/
protected Query getPrefixQuery(String field, String termStr) throws ParseException
{
+ if (!allowZeroLengthPrefixQuery && termStr.startsWith("*"))
+ throw new ParseException("'*' not allowed as first character in PrefixQuery");
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
@@ -663,16 +684,6 @@
< <_WHITESPACE>>
}
-// OG: to support prefix queries:
-// http://issues.apache.org/bugzilla/show_bug.cgi?id=12137
-// Change from:
-//
-// |
-// (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
-// To:
-//
-// (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
-
TOKEN : {
|
@@ -686,9 +697,8 @@
|
| (<_TERM_CHAR>)* >
| )+ ( "." (<_NUM_CHAR>)+ )? )? >
-| (<_TERM_CHAR>)* "*" >
-|
- (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
+| | "*") (<_TERM_CHAR>)* "*" >
+| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| : RangeIn
| : RangeEx
}
Index: src/java/org/apache/lucene/queryParser/CharStream.java
===================================================================
--- src/java/org/apache/lucene/queryParser/CharStream.java (revision 452621)
+++ src/java/org/apache/lucene/queryParser/CharStream.java (working copy)
@@ -26,6 +26,20 @@
char readChar() throws java.io.IOException;
/**
+ * Returns the column position of the character last read.
+ * @deprecated
+ * @see #getEndColumn
+ */
+ int getColumn();
+
+ /**
+ * Returns the line number of the character last read.
+ * @deprecated
+ * @see #getEndLine
+ */
+ int getLine();
+
+ /**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 452621)
+++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy)
@@ -104,7 +104,7 @@
{
int[] nextStates;
int startsAt = 0;
- jjnewStateCnt = 33;
+ jjnewStateCnt = 35;
int i = 1;
jjstateSet[0] = startState;
int j, kind = 0x7fffffff;
@@ -120,11 +120,11 @@
switch(jjstateSet[--i])
{
case 0:
- if ((0x7bffd0f8ffffd9ffL & l) != 0L)
+ if ((0xfbffd4f8ffffd9ffL & l) != 0L)
{
- if (kind > 17)
- kind = 17;
- jjCheckNAddStates(0, 6);
+ if (kind > 20)
+ kind = 20;
+ jjCheckNAddTwoStates(31, 32);
}
else if ((0x100002600L & l) != 0L)
{
@@ -138,6 +138,14 @@
if (kind > 9)
kind = 9;
}
+ if ((0x7bffd4f8ffffd9ffL & l) != 0L)
+ jjCheckNAddStates(0, 2);
+ if ((0x7bffd0f8ffffd9ffL & l) != 0L)
+ {
+ if (kind > 17)
+ kind = 17;
+ jjCheckNAddTwoStates(18, 19);
+ }
if (curChar == 38)
jjstateSet[jjnewStateCnt++] = 4;
break;
@@ -165,70 +173,81 @@
if (curChar == 34 && kind > 16)
kind = 16;
break;
- case 18:
- if ((0x3ff000000000000L & l) == 0L)
- break;
- if (kind > 18)
- kind = 18;
- jjAddStates(7, 8);
- break;
- case 19:
- if (curChar == 46)
- jjCheckNAdd(20);
- break;
- case 20:
- if ((0x3ff000000000000L & l) == 0L)
- break;
- if (kind > 18)
- kind = 18;
- jjCheckNAdd(20);
- break;
- case 21:
+ case 17:
if ((0x7bffd0f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddStates(0, 6);
+ jjCheckNAddTwoStates(18, 19);
break;
- case 22:
+ case 18:
if ((0x7bfff8f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(22, 23);
+ jjCheckNAddTwoStates(18, 19);
break;
- case 24:
+ case 20:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(22, 23);
+ jjCheckNAddTwoStates(18, 19);
break;
+ case 22:
+ if ((0x3ff000000000000L & l) == 0L)
+ break;
+ if (kind > 18)
+ kind = 18;
+ jjAddStates(3, 4);
+ break;
+ case 23:
+ if (curChar == 46)
+ jjCheckNAdd(24);
+ break;
+ case 24:
+ if ((0x3ff000000000000L & l) == 0L)
+ break;
+ if (kind > 18)
+ kind = 18;
+ jjCheckNAdd(24);
+ break;
case 25:
+ if ((0x7bffd4f8ffffd9ffL & l) != 0L)
+ jjCheckNAddStates(0, 2);
+ break;
+ case 26:
if ((0x7bfff8f8ffffd9ffL & l) != 0L)
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(0, 2);
break;
- case 26:
+ case 27:
if (curChar == 42 && kind > 19)
kind = 19;
break;
- case 28:
+ case 29:
if ((0x84002f0600000000L & l) != 0L)
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(0, 2);
break;
- case 29:
+ case 30:
+ if ((0xfbffd4f8ffffd9ffL & l) == 0L)
+ break;
+ if (kind > 20)
+ kind = 20;
+ jjCheckNAddTwoStates(31, 32);
+ break;
+ case 31:
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(31, 32);
break;
- case 31:
+ case 33:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(31, 32);
break;
default : break;
}
@@ -244,18 +263,26 @@
case 0:
if ((0x97ffffff97ffffffL & l) != 0L)
{
- if (kind > 17)
- kind = 17;
- jjCheckNAddStates(0, 6);
+ if (kind > 20)
+ kind = 20;
+ jjCheckNAddTwoStates(31, 32);
}
else if (curChar == 126)
{
if (kind > 18)
kind = 18;
- jjstateSet[jjnewStateCnt++] = 18;
+ jjstateSet[jjnewStateCnt++] = 22;
}
+ if ((0x97ffffff97ffffffL & l) != 0L)
+ jjCheckNAddStates(0, 2);
+ if ((0x97ffffff97ffffffL & l) != 0L)
+ {
+ if (kind > 17)
+ kind = 17;
+ jjCheckNAddTwoStates(18, 19);
+ }
if (curChar == 92)
- jjCheckNAddStates(12, 14);
+ jjCheckNAddStates(5, 7);
else if (curChar == 78)
jjstateSet[jjnewStateCnt++] = 11;
else if (curChar == 124)
@@ -306,73 +333,69 @@
jjstateSet[jjnewStateCnt++] = 11;
break;
case 15:
- jjAddStates(15, 16);
+ jjAddStates(8, 9);
break;
case 17:
- if (curChar != 126)
- break;
- if (kind > 18)
- kind = 18;
- jjstateSet[jjnewStateCnt++] = 18;
- break;
- case 21:
+ case 18:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddStates(0, 6);
+ jjCheckNAddTwoStates(18, 19);
break;
- case 22:
- if ((0x97ffffff97ffffffL & l) == 0L)
- break;
- if (kind > 17)
- kind = 17;
- jjCheckNAddTwoStates(22, 23);
- break;
- case 23:
+ case 19:
if (curChar == 92)
- jjCheckNAddTwoStates(24, 24);
+ jjCheckNAddTwoStates(20, 20);
break;
- case 24:
+ case 20:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(22, 23);
+ jjCheckNAddTwoStates(18, 19);
break;
+ case 21:
+ if (curChar != 126)
+ break;
+ if (kind > 18)
+ kind = 18;
+ jjstateSet[jjnewStateCnt++] = 22;
+ break;
case 25:
+ case 26:
if ((0x97ffffff97ffffffL & l) != 0L)
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(0, 2);
break;
- case 27:
+ case 28:
if (curChar == 92)
- jjCheckNAddTwoStates(28, 28);
+ jjCheckNAddTwoStates(29, 29);
break;
- case 28:
+ case 29:
if ((0x6800000078000000L & l) != 0L)
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(0, 2);
break;
- case 29:
+ case 30:
+ case 31:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(31, 32);
break;
- case 30:
+ case 32:
if (curChar == 92)
- jjCheckNAddTwoStates(31, 31);
+ jjCheckNAddTwoStates(33, 33);
break;
- case 31:
+ case 33:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(31, 32);
break;
- case 32:
+ case 34:
if (curChar == 92)
- jjCheckNAddStates(12, 14);
+ jjCheckNAddStates(5, 7);
break;
default : break;
}
@@ -390,33 +413,45 @@
switch(jjstateSet[--i])
{
case 0:
- if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
- break;
- if (kind > 17)
- kind = 17;
- jjCheckNAddStates(0, 6);
+ if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 17)
+ kind = 17;
+ jjCheckNAddTwoStates(18, 19);
+ }
+ if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+ jjCheckNAddStates(0, 2);
+ if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 20)
+ kind = 20;
+ jjCheckNAddTwoStates(31, 32);
+ }
break;
case 15:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjAddStates(15, 16);
+ jjAddStates(8, 9);
break;
- case 22:
+ case 17:
+ case 18:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(22, 23);
+ jjCheckNAddTwoStates(18, 19);
break;
case 25:
+ case 26:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(0, 2);
break;
- case 29:
+ case 30:
+ case 31:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(31, 32);
break;
default : break;
}
@@ -429,7 +464,7 @@
kind = 0x7fffffff;
}
++curPos;
- if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt)))
+ if ((i = jjnewStateCnt) == (startsAt = 35 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@@ -565,7 +600,7 @@
jjCheckNAdd(4);
break;
case 2:
- jjAddStates(17, 18);
+ jjAddStates(10, 11);
break;
default : break;
}
@@ -592,7 +627,7 @@
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjAddStates(17, 18);
+ jjAddStates(10, 11);
break;
default : break;
}
@@ -639,7 +674,7 @@
break;
if (kind > 23)
kind = 23;
- jjAddStates(19, 20);
+ jjAddStates(12, 13);
break;
case 1:
if (curChar == 46)
@@ -825,7 +860,7 @@
jjCheckNAdd(4);
break;
case 2:
- jjAddStates(17, 18);
+ jjAddStates(10, 11);
break;
default : break;
}
@@ -852,7 +887,7 @@
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjAddStates(17, 18);
+ jjAddStates(10, 11);
break;
default : break;
}
@@ -872,8 +907,7 @@
}
}
static final int[] jjnextStates = {
- 22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15,
- 16, 2, 3, 0, 1,
+ 26, 27, 28, 22, 23, 20, 29, 33, 15, 16, 2, 3, 0, 1,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
@@ -908,8 +942,8 @@
0x40L,
};
protected CharStream input_stream;
-private final int[] jjrounds = new int[33];
-private final int[] jjstateSet = new int[66];
+private final int[] jjrounds = new int[35];
+private final int[] jjstateSet = new int[70];
protected char curChar;
public QueryParserTokenManager(CharStream stream)
{
@@ -931,7 +965,7 @@
{
int i;
jjround = 0x80000001;
- for (i = 33; i-- > 0;)
+ for (i = 35; i-- > 0;)
jjrounds[i] = 0x80000000;
}
public void ReInit(CharStream stream, int lexState)