Property changes on: .
___________________________________________________________________
Modified: svn:ignore
- build
dist
*~
velocity.log
build.properties
.idea
*.iml
*.ipr
*.iws
.project
.classpath
.settings
prj.el
+ build
dist
*~
velocity.log
build.properties
.idea
*.iml
*.ipr
*.iws
.project
.classpath
.settings
prj.el
work
Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 990099)
+++ CHANGES.txt (working copy)
@@ -131,6 +131,10 @@
New features
+* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
+ are directly supported by the standard queryparser.
+ (Simon Willnauer, Robert Muir)
+
* LUCENE-1606, LUCENE-2089: Adds AutomatonQuery, a MultiTermQuery that
matches terms against a finite-state machine. Implement WildcardQuery
and FuzzyQuery with finite-state methods. Adds RegexpQuery.
Index: contrib/CHANGES.txt
===================================================================
--- contrib/CHANGES.txt (revision 990099)
+++ contrib/CHANGES.txt (working copy)
@@ -9,6 +9,9 @@
New Features
+ * LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
+ (Simon Willnauer, Robert Muir)
+
* LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific
Directory impl that uses the O_DIRECT flag to bypass the buffer
cache. This is useful to prevent segment merging from evicting
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java (working copy)
@@ -109,4 +109,4 @@
void Done();
}
-/* JavaCC - OriginalChecksum=8cc617b193267dc876ef9699367c8186 (do not edit this line) */
+/* JavaCC - OriginalChecksum=7bcd45d10a032f1c9da64691d073cf75 (do not edit this line) */
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java (working copy)
@@ -195,4 +195,4 @@
}
}
-/* JavaCC - OriginalChecksum=15fbbe38a36c8ac9e2740d030624c321 (do not edit this line) */
+/* JavaCC - OriginalChecksum=4440e368eeef562faffeca98a200334b (do not edit this line) */
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java (working copy)
@@ -17,9 +17,11 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@@ -95,6 +97,7 @@
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
+ MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
Analyzer analyzer;
String field;
@@ -232,6 +235,27 @@
public boolean getLowercaseExpandedTerms() {
return lowercaseExpandedTerms;
}
+ /**
+ * By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
+ * a) Runs faster b) Does not have the scarcity of terms unduly influence score
+ * c) avoids any "TooManyBooleanClauses" exception.
+ * However, if your application really needs to use the
+ * old-fashioned BooleanQuery expansion rewriting and the above
+ * points are not relevant then use this to change
+ * the rewrite method.
+ */
+ public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
+ multiTermRewriteMethod = method;
+ }
+
+
+ /**
+ * @see #setMultiTermRewriteMethod
+ */
+ public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
+ return multiTermRewriteMethod;
+ }
/**
* Set locale used by date range parsing.
@@ -426,7 +450,9 @@
}
catch (Exception e) { }
- return new TermRangeQuery(field, part1, part2, inclusive, inclusive);
+ final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive);
+ query.setRewriteMethod(multiTermRewriteMethod);
+ return query;
}
/**
@@ -500,7 +526,9 @@
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
- return new WildcardQuery(t);
+ final WildcardQuery query = new WildcardQuery(t);
+ query.setRewriteMethod(multiTermRewriteMethod);
+ return query;
}
/**
@@ -532,7 +560,40 @@
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
- return new PrefixQuery(t);
+ final PrefixQuery query = new PrefixQuery(t);
+ query.setRewriteMethod(multiTermRewriteMethod);
+ return query;
+ }
+
+ /**
+ * Factory method for generating a query. Called when parser
+ * parses an input term token that contains a regular expression
+ * query.
+ *
+ * Depending on settings, pattern term may be lower-cased
+ * automatically. It will not go through the default Analyzer,
+ * however, since normal Analyzers are unlikely to work properly
+ * with regular expression templates.
+ *
+ * Can be overridden by extending classes, to provide custom handling for
+ * regular expression queries, which may be necessary due to missing analyzer
+ * calls.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token that contains a regular expression
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getRegexpQuery(String field, String termStr) throws ParseException
+ {
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ final Term regexp = new Term(field, termStr);
+ final RegexpQuery query = new RegexpQuery(regexp);
+ query.setRewriteMethod(multiTermRewriteMethod);
+ return query;
}
/**
@@ -675,6 +736,7 @@
case TERM:
case PREFIXTERM:
case WILDTERM:
+ case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@@ -750,6 +812,7 @@
case TERM:
case PREFIXTERM:
case WILDTERM:
+ case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@@ -790,11 +853,14 @@
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
+ boolean regexp = false;
+
Query q;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
case PREFIXTERM:
case WILDTERM:
+ case REGEXPTERM:
case NUMBER:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
@@ -808,6 +874,10 @@
term = jj_consume_token(WILDTERM);
wildcard=true;
break;
+ case REGEXPTERM:
+ term = jj_consume_token(REGEXPTERM);
+ regexp=true;
+ break;
case NUMBER:
term = jj_consume_token(NUMBER);
break;
@@ -850,6 +920,8 @@
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
+ } else if (regexp) {
+ q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {
@@ -1055,11 +1127,16 @@
private int jj_gen;
final private int[] jj_la1 = new int[24];
static private int[] jj_la1_0;
+ static private int[] jj_la1_1;
static {
jj_la1_init_0();
+ jj_la1_init_1();
}
private static void jj_la1_init_0() {
- jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f00,0x100,0x80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,};
+ jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0x1fb1f00,0x100,0x80,0x8000,0x1fb1000,0x13a0000,0x40000,0x40000,0x8000,0x18000000,0x2000000,0x18000000,0x8000,0x80000000,0x20000000,0x80000000,0x8000,0x40000,0x8000,0x1fb0000,};
+ }
+ private static void jj_la1_init_1() {
+ jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[1];
private boolean jj_rescan = false;
@@ -1213,7 +1290,7 @@
/** Generate ParseException. */
public ParseException generateParseException() {
jj_expentries.clear();
- boolean[] la1tokens = new boolean[32];
+ boolean[] la1tokens = new boolean[33];
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
@@ -1224,10 +1301,13 @@
if ((jj_la1_0[i] & (1<
+ * Depending on settings, pattern term may be lower-cased
+ * automatically. It will not go through the default Analyzer,
+ * however, since normal Analyzers are unlikely to work properly
+ * with regular expression templates.
+ *
+ * Can be overridden by extending classes, to provide custom handling for
+ * regular expression queries, which may be necessary due to missing analyzer
+ * calls.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token that contains a regular expression
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getRegexpQuery(String field, String termStr) throws ParseException
+ {
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ final Term regexp = new Term(field, termStr);
+ final RegexpQuery query = new RegexpQuery(regexp);
+ query.setRewriteMethod(multiTermRewriteMethod);
+ return query;
}
/**
@@ -678,6 +739,7 @@
| (<_TERM_CHAR>)* "*" >
|
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
+|
| : RangeIn
| : RangeEx
}
@@ -813,6 +875,8 @@
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
+ boolean regexp = false;
+
Query q;
}
{
@@ -821,6 +885,7 @@
term=
| term= { prefix=true; }
| term= { wildcard=true; }
+ | term= { regexp=true; }
| term=
)
[ fuzzySlop= { fuzzy=true; } ]
@@ -833,6 +898,8 @@
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
+ } else if (regexp) {
+ q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java (working copy)
@@ -49,27 +49,29 @@
/** RegularExpression Id. */
int WILDTERM = 20;
/** RegularExpression Id. */
- int RANGEIN_START = 21;
+ int REGEXPTERM = 21;
/** RegularExpression Id. */
- int RANGEEX_START = 22;
+ int RANGEIN_START = 22;
/** RegularExpression Id. */
- int NUMBER = 23;
+ int RANGEEX_START = 23;
/** RegularExpression Id. */
- int RANGEIN_TO = 24;
+ int NUMBER = 24;
/** RegularExpression Id. */
- int RANGEIN_END = 25;
+ int RANGEIN_TO = 25;
/** RegularExpression Id. */
- int RANGEIN_QUOTED = 26;
+ int RANGEIN_END = 26;
/** RegularExpression Id. */
- int RANGEIN_GOOP = 27;
+ int RANGEIN_QUOTED = 27;
/** RegularExpression Id. */
- int RANGEEX_TO = 28;
+ int RANGEIN_GOOP = 28;
/** RegularExpression Id. */
- int RANGEEX_END = 29;
+ int RANGEEX_TO = 29;
/** RegularExpression Id. */
- int RANGEEX_QUOTED = 30;
+ int RANGEEX_END = 30;
/** RegularExpression Id. */
- int RANGEEX_GOOP = 31;
+ int RANGEEX_QUOTED = 31;
+ /** RegularExpression Id. */
+ int RANGEEX_GOOP = 32;
/** Lexical state. */
int Boost = 0;
@@ -103,6 +105,7 @@
"",
"",
"",
+ "",
"\"[\"",
"\"{\"",
"",
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java (working copy)
@@ -15,9 +15,11 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@@ -66,11 +68,11 @@
case 58:
return jjStopAtPos(0, 14);
case 91:
- return jjStopAtPos(0, 21);
+ return jjStopAtPos(0, 22);
case 94:
return jjStopAtPos(0, 15);
case 123:
- return jjStopAtPos(0, 22);
+ return jjStopAtPos(0, 23);
default :
return jjMoveNfa_3(0, 0);
}
@@ -84,7 +86,7 @@
private int jjMoveNfa_3(int startState, int curPos)
{
int startsAt = 0;
- jjnewStateCnt = 33;
+ jjnewStateCnt = 38;
int i = 1;
jjstateSet[0] = startState;
int kind = 0x7fffffff;
@@ -118,7 +120,9 @@
if (kind > 9)
kind = 9;
}
- if (curChar == 38)
+ if (curChar == 47)
+ jjCheckNAddStates(7, 9);
+ else if (curChar == 38)
jjstateSet[jjnewStateCnt++] = 4;
break;
case 4:
@@ -150,7 +154,7 @@
break;
if (kind > 18)
kind = 18;
- jjAddStates(7, 8);
+ jjAddStates(10, 11);
break;
case 19:
if (curChar == 46)
@@ -164,51 +168,64 @@
jjCheckNAdd(20);
break;
case 21:
+ case 23:
+ if (curChar == 47)
+ jjCheckNAddStates(7, 9);
+ break;
+ case 22:
+ if ((0xffff7fffffffffffL & l) != 0L)
+ jjCheckNAddStates(7, 9);
+ break;
+ case 25:
+ if (curChar == 47 && kind > 21)
+ kind = 21;
+ break;
+ case 26:
if ((0x7bffd0f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddStates(0, 6);
break;
- case 22:
+ case 27:
if ((0x7bfff8f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(22, 23);
+ jjCheckNAddTwoStates(27, 28);
break;
- case 24:
+ case 29:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(22, 23);
+ jjCheckNAddTwoStates(27, 28);
break;
- case 25:
+ case 30:
if ((0x7bfff8f8ffffd9ffL & l) != 0L)
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(12, 14);
break;
- case 26:
+ case 31:
if (curChar == 42 && kind > 19)
kind = 19;
break;
- case 28:
+ case 33:
if ((0x84002f0600000000L & l) != 0L)
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(12, 14);
break;
- case 29:
+ case 34:
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
- case 31:
+ case 36:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
default : break;
}
@@ -235,7 +252,7 @@
jjstateSet[jjnewStateCnt++] = 18;
}
if (curChar == 92)
- jjCheckNAddStates(12, 14);
+ jjCheckNAddStates(15, 17);
else if (curChar == 78)
jjstateSet[jjnewStateCnt++] = 11;
else if (curChar == 124)
@@ -286,7 +303,7 @@
jjstateSet[jjnewStateCnt++] = 11;
break;
case 15:
- jjAddStates(15, 16);
+ jjAddStates(18, 19);
break;
case 17:
if (curChar != 126)
@@ -295,64 +312,71 @@
kind = 18;
jjstateSet[jjnewStateCnt++] = 18;
break;
- case 21:
+ case 22:
+ jjAddStates(7, 9);
+ break;
+ case 24:
+ if (curChar == 92)
+ jjstateSet[jjnewStateCnt++] = 23;
+ break;
+ case 26:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddStates(0, 6);
break;
- case 22:
+ case 27:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(22, 23);
+ jjCheckNAddTwoStates(27, 28);
break;
- case 23:
+ case 28:
if (curChar == 92)
- jjCheckNAddTwoStates(24, 24);
+ jjCheckNAddTwoStates(29, 29);
break;
- case 24:
+ case 29:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(22, 23);
+ jjCheckNAddTwoStates(27, 28);
break;
- case 25:
+ case 30:
if ((0x97ffffff97ffffffL & l) != 0L)
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(12, 14);
break;
- case 27:
+ case 32:
if (curChar == 92)
- jjCheckNAddTwoStates(28, 28);
+ jjCheckNAddTwoStates(33, 33);
break;
- case 28:
+ case 33:
if ((0x6800000078000000L & l) != 0L)
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(12, 14);
break;
- case 29:
+ case 34:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
- case 30:
+ case 35:
if (curChar == 92)
- jjCheckNAddTwoStates(31, 31);
+ jjCheckNAddTwoStates(36, 36);
break;
- case 31:
+ case 36:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
- case 32:
+ case 37:
if (curChar == 92)
- jjCheckNAddStates(12, 14);
+ jjCheckNAddStates(15, 17);
break;
default : break;
}
@@ -378,25 +402,29 @@
break;
case 15:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjAddStates(15, 16);
+ jjAddStates(18, 19);
break;
case 22:
+ if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+ jjAddStates(7, 9);
+ break;
+ case 27:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 17)
kind = 17;
- jjCheckNAddTwoStates(22, 23);
+ jjCheckNAddTwoStates(27, 28);
break;
- case 25:
+ case 30:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjCheckNAddStates(9, 11);
+ jjCheckNAddStates(12, 14);
break;
- case 29:
+ case 34:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 20)
kind = 20;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
default : break;
}
@@ -409,7 +437,7 @@
kind = 0x7fffffff;
}
++curPos;
- if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt)))
+ if ((i = jjnewStateCnt) == (startsAt = 38 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@@ -420,9 +448,9 @@
switch (pos)
{
case 0:
- if ((active0 & 0x10000000L) != 0L)
+ if ((active0 & 0x20000000L) != 0L)
{
- jjmatchedKind = 31;
+ jjmatchedKind = 32;
return 4;
}
return -1;
@@ -439,9 +467,9 @@
switch(curChar)
{
case 84:
- return jjMoveStringLiteralDfa1_1(0x10000000L);
+ return jjMoveStringLiteralDfa1_1(0x20000000L);
case 125:
- return jjStopAtPos(0, 29);
+ return jjStopAtPos(0, 30);
default :
return jjMoveNfa_1(0, 0);
}
@@ -456,8 +484,8 @@
switch(curChar)
{
case 79:
- if ((active0 & 0x10000000L) != 0L)
- return jjStartNfaWithStates_1(1, 28, 4);
+ if ((active0 & 0x20000000L) != 0L)
+ return jjStartNfaWithStates_1(1, 29, 4);
break;
default :
break;
@@ -493,8 +521,8 @@
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
- if (kind > 31)
- kind = 31;
+ if (kind > 32)
+ kind = 32;
jjCheckNAdd(4);
}
if ((0x100002600L & l) != 0L)
@@ -514,14 +542,14 @@
jjCheckNAddTwoStates(2, 3);
break;
case 3:
- if (curChar == 34 && kind > 30)
- kind = 30;
+ if (curChar == 34 && kind > 31)
+ kind = 31;
break;
case 4:
if ((0xfffffffeffffffffL & l) == 0L)
break;
- if (kind > 31)
- kind = 31;
+ if (kind > 32)
+ kind = 32;
jjCheckNAdd(4);
break;
default : break;
@@ -539,12 +567,12 @@
case 4:
if ((0xdfffffffffffffffL & l) == 0L)
break;
- if (kind > 31)
- kind = 31;
+ if (kind > 32)
+ kind = 32;
jjCheckNAdd(4);
break;
case 2:
- jjAddStates(17, 18);
+ jjAddStates(20, 21);
break;
default : break;
}
@@ -565,13 +593,13 @@
case 4:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
- if (kind > 31)
- kind = 31;
+ if (kind > 32)
+ kind = 32;
jjCheckNAdd(4);
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjAddStates(17, 18);
+ jjAddStates(20, 21);
break;
default : break;
}
@@ -615,9 +643,9 @@
case 0:
if ((0x3ff000000000000L & l) == 0L)
break;
- if (kind > 23)
- kind = 23;
- jjAddStates(19, 20);
+ if (kind > 24)
+ kind = 24;
+ jjAddStates(22, 23);
break;
case 1:
if (curChar == 46)
@@ -626,8 +654,8 @@
case 2:
if ((0x3ff000000000000L & l) == 0L)
break;
- if (kind > 23)
- kind = 23;
+ if (kind > 24)
+ kind = 24;
jjCheckNAdd(2);
break;
default : break;
@@ -678,9 +706,9 @@
switch (pos)
{
case 0:
- if ((active0 & 0x1000000L) != 0L)
+ if ((active0 & 0x2000000L) != 0L)
{
- jjmatchedKind = 27;
+ jjmatchedKind = 28;
return 4;
}
return -1;
@@ -697,9 +725,9 @@
switch(curChar)
{
case 84:
- return jjMoveStringLiteralDfa1_2(0x1000000L);
+ return jjMoveStringLiteralDfa1_2(0x2000000L);
case 93:
- return jjStopAtPos(0, 25);
+ return jjStopAtPos(0, 26);
default :
return jjMoveNfa_2(0, 0);
}
@@ -714,8 +742,8 @@
switch(curChar)
{
case 79:
- if ((active0 & 0x1000000L) != 0L)
- return jjStartNfaWithStates_2(1, 24, 4);
+ if ((active0 & 0x2000000L) != 0L)
+ return jjStartNfaWithStates_2(1, 25, 4);
break;
default :
break;
@@ -751,8 +779,8 @@
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
- if (kind > 27)
- kind = 27;
+ if (kind > 28)
+ kind = 28;
jjCheckNAdd(4);
}
if ((0x100002600L & l) != 0L)
@@ -772,14 +800,14 @@
jjCheckNAddTwoStates(2, 3);
break;
case 3:
- if (curChar == 34 && kind > 26)
- kind = 26;
+ if (curChar == 34 && kind > 27)
+ kind = 27;
break;
case 4:
if ((0xfffffffeffffffffL & l) == 0L)
break;
- if (kind > 27)
- kind = 27;
+ if (kind > 28)
+ kind = 28;
jjCheckNAdd(4);
break;
default : break;
@@ -797,12 +825,12 @@
case 4:
if ((0xffffffffdfffffffL & l) == 0L)
break;
- if (kind > 27)
- kind = 27;
+ if (kind > 28)
+ kind = 28;
jjCheckNAdd(4);
break;
case 2:
- jjAddStates(17, 18);
+ jjAddStates(20, 21);
break;
default : break;
}
@@ -823,13 +851,13 @@
case 4:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
- if (kind > 27)
- kind = 27;
+ if (kind > 28)
+ kind = 28;
jjCheckNAdd(4);
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
- jjAddStates(17, 18);
+ jjAddStates(20, 21);
break;
default : break;
}
@@ -849,8 +877,8 @@
}
}
static final int[] jjnextStates = {
- 22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15,
- 16, 2, 3, 0, 1,
+ 27, 30, 31, 34, 35, 32, 28, 22, 24, 25, 18, 19, 30, 31, 32, 29,
+ 33, 36, 15, 16, 2, 3, 0, 1,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
@@ -868,8 +896,8 @@
/** Token literal values. */
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50",
-"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117",
-"\135", null, null, "\124\117", "\175", null, null, };
+"\51", "\72", "\136", null, null, null, null, null, null, "\133", "\173", null,
+"\124\117", "\135", null, null, "\124\117", "\175", null, null, };
/** Lexer state names. */
public static final String[] lexStateNames = {
@@ -881,18 +909,18 @@
/** Lex State array. */
public static final int[] jjnewLexState = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1,
- 3, -1, -1, -1, 3, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, 1, 3,
+ -1, 3, -1, -1, -1, 3, -1, -1,
};
static final long[] jjtoToken = {
- 0xffffff81L,
+ 0x1ffffff81L,
};
static final long[] jjtoSkip = {
0x40L,
};
protected CharStream input_stream;
-private final int[] jjrounds = new int[33];
-private final int[] jjstateSet = new int[66];
+private final int[] jjrounds = new int[38];
+private final int[] jjstateSet = new int[76];
protected char curChar;
/** Constructor. */
public PrecedenceQueryParserTokenManager(CharStream stream){
@@ -917,7 +945,7 @@
{
int i;
jjround = 0x80000001;
- for (i = 33; i-- > 0;)
+ for (i = 38; i-- > 0;)
jjrounds[i] = 0x80000000;
}
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java (working copy)
@@ -121,4 +121,4 @@
}
}
-/* JavaCC - OriginalChecksum=0dc5808f2ab8aac8775ea9175fa2cb51 (do not edit this line) */
+/* JavaCC - OriginalChecksum=bc9495ddfa3189061fb4f1bf3c4f64e2 (do not edit this line) */
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java (working copy)
@@ -138,4 +138,4 @@
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
-/* JavaCC - OriginalChecksum=257b82f2650841e86289a309cb3dae76 (do not edit this line) */
+/* JavaCC - OriginalChecksum=e01667f2eb6d0b2f1fbb6958df0ca751 (do not edit this line) */
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RegexpQueryNodeBuilder.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RegexpQueryNodeBuilder.java (revision 0)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RegexpQueryNodeBuilder.java (revision 0)
@@ -0,0 +1,52 @@
+package org.apache.lucene.queryParser.standard.builders;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.core.QueryNodeException;
+import org.apache.lucene.queryParser.core.nodes.QueryNode;
+import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute;
+import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.RegexpQuery;
+
+/**
+ * Builds a {@link RegexpQuery} object from a {@link RegexpQueryNode} object.
+ */
+public class RegexpQueryNodeBuilder implements StandardQueryBuilder {
+
+ public RegexpQueryNodeBuilder() {
+ // empty constructor
+ }
+
+ public RegexpQuery build(QueryNode queryNode) throws QueryNodeException {
+ RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode;
+
+ RegexpQuery q = new RegexpQuery(new Term(regexpNode.getFieldAsString(),
+ regexpNode.textToBytesRef()));
+
+ MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod) queryNode
+ .getTag(MultiTermRewriteMethodAttribute.TAG_ID);
+ if (method != null) {
+ q.setRewriteMethod(method);
+ }
+
+ return q;
+ }
+
+}
Property changes on: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RegexpQueryNodeBuilder.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardQueryTreeBuilder.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardQueryTreeBuilder.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardQueryTreeBuilder.java (working copy)
@@ -33,6 +33,7 @@
import org.apache.lucene.queryParser.standard.nodes.MultiPhraseQueryNode;
import org.apache.lucene.queryParser.standard.nodes.PrefixWildcardQueryNode;
import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode;
+import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.standard.nodes.StandardBooleanQueryNode;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline;
@@ -63,6 +64,7 @@
setBuilder(PrefixWildcardQueryNode.class,
new PrefixWildcardQueryNodeBuilder());
setBuilder(RangeQueryNode.class, new RangeQueryNodeBuilder());
+ setBuilder(RegexpQueryNode.class, new RegexpQueryNodeBuilder());
setBuilder(SlopQueryNode.class, new SlopQueryNodeBuilder());
setBuilder(StandardBooleanQueryNode.class,
new StandardBooleanQueryNodeBuilder());
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RegexpQueryNode.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RegexpQueryNode.java (revision 0)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RegexpQueryNode.java (revision 0)
@@ -0,0 +1,92 @@
+package org.apache.lucene.queryParser.standard.nodes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.queryParser.core.nodes.FieldableNode;
+import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl;
+import org.apache.lucene.queryParser.core.nodes.TextableQueryNode;
+import org.apache.lucene.queryParser.core.parser.EscapeQuerySyntax;
+import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A {@link RegexpQueryNode} represents {@link RegexpQuery} query Examples: /[a-z]|[0-9]/
+ */
+public class RegexpQueryNode extends QueryNodeImpl implements TextableQueryNode,
+FieldableNode {
+ private static final long serialVersionUID = 0L;
+ private CharSequence text;
+ private CharSequence field;
+ /**
+ * @param field
+ * - field name
+ * @param text
+ * - value that contains a regular expression
+ * @param begin
+ * - position in the query string
+ * @param end
+ * - position in the query string
+ */
+ public RegexpQueryNode(CharSequence field, CharSequence text, int begin,
+ int end) {
+ this.field = field;
+ this.text = text.subSequence(begin, end);
+ }
+
+ public BytesRef textToBytesRef() {
+ return new BytesRef(text);
+ }
+
+ @Override
+ public String toString() {
+ return "";
+ }
+
+ @Override
+ public RegexpQueryNode cloneTree() throws CloneNotSupportedException {
+ RegexpQueryNode clone = (RegexpQueryNode) super.cloneTree();
+ clone.field = this.field;
+ clone.text = this.text;
+ return clone;
+ }
+
+ public CharSequence getText() {
+ return text;
+ }
+
+ public void setText(CharSequence text) {
+ this.text = text;
+ }
+
+ public CharSequence getField() {
+ return field;
+ }
+
+ public String getFieldAsString() {
+ return field.toString();
+ }
+
+ public void setField(CharSequence field) {
+ this.field = field;
+ }
+
+ public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
+ return isDefaultField(field)? "/"+text+"/": field + ":/" + text + "/";
+ }
+
+}
Property changes on: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RegexpQueryNode.java
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Date Author Id Revision HeadURL
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java (working copy)
@@ -613,4 +613,4 @@
}
}
-/* JavaCC - OriginalChecksum=f19c73b8f7faf94cc4a581e7b2933cc6 (do not edit this line) */
+/* JavaCC - OriginalChecksum=31519f95b41182c6740c2afd8dfbf344 (do not edit this line) */
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java (working copy)
@@ -193,4 +193,4 @@
}
}
-/* JavaCC - OriginalChecksum=38bce846fe6c8482993969f741c0323e (do not edit this line) */
+/* JavaCC - OriginalChecksum=d0caeac083e9874065f9d1e298b5ccd9 (do not edit this line) */
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java (working copy)
@@ -40,6 +40,7 @@
import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
+import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.core.nodes.SlopQueryNode;
import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
@@ -178,6 +179,7 @@
case LPAREN:
case QUOTED:
case TERM:
+ case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@@ -326,6 +328,7 @@
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case QUOTED:
case TERM:
+ case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@@ -373,17 +376,23 @@
final public QueryNode Term(CharSequence field) throws ParseException {
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean fuzzy = false;
+ boolean regexp = false;
QueryNode q =null;
ParametricQueryNode qLower, qUpper;
float defaultMinSimilarity = 0.5f;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
+ case REGEXPTERM:
case NUMBER:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
term = jj_consume_token(TERM);
q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn);
break;
+ case REGEXPTERM:
+ term = jj_consume_token(REGEXPTERM);
+ regexp=true;
+ break;
case NUMBER:
term = jj_consume_token(NUMBER);
break;
@@ -428,6 +437,8 @@
{if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));}
}
q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
+ } else if (regexp) {
+ q = new RegexpQueryNode(field, term.image, term.beginColumn, term.endColumn-1);
}
break;
case RANGEIN_START:
@@ -630,7 +641,7 @@
jj_la1_init_0();
}
private static void jj_la1_init_0() {
- jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x763c00,0x200,0x100,0x10000,0x762000,0x440000,0x80000,0x80000,0x10000,0x6000000,0x800000,0x6000000,0x10000,0x60000000,0x8000000,0x60000000,0x10000,0x80000,0x10000,0x760000,};
+ jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xf63c00,0x200,0x100,0x10000,0xf62000,0x940000,0x80000,0x80000,0x10000,0xc000000,0x1000000,0xc000000,0x10000,0xc0000000,0x10000000,0xc0000000,0x10000,0x80000,0x10000,0xf60000,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[1];
private boolean jj_rescan = false;
@@ -816,7 +827,7 @@
/** Generate ParseException. */
public ParseException generateParseException() {
jj_expentries.clear();
- boolean[] la1tokens = new boolean[31];
+ boolean[] la1tokens = new boolean[32];
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
@@ -830,7 +841,7 @@
}
}
}
- for (int i = 0; i < 31; i++) {
+ for (int i = 0; i < 32; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj (working copy)
@@ -52,6 +52,7 @@
import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
+import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.core.nodes.SlopQueryNode;
import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
@@ -132,6 +133,7 @@
| )* "\"">
| (<_TERM_CHAR>)* >
| )+ ( "." (<_NUM_CHAR>)+ )? )? >
+|
| : RangeIn
| : RangeEx
}
@@ -374,6 +376,7 @@
QueryNode Term(CharSequence field) : {
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean fuzzy = false;
+ boolean regexp = false;
QueryNode q =null;
ParametricQueryNode qLower, qUpper;
float defaultMinSimilarity = 0.5f;
@@ -382,6 +385,7 @@
(
(
term= { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); }
+ | term= { regexp=true; }
| term=
)
[ fuzzySlop= { fuzzy=true; } ]
@@ -396,6 +400,8 @@
throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));
}
q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
+ } else if (regexp) {
+ q = new RegexpQueryNode(field, term.image, term.beginColumn, term.endColumn-1);
}
}
| ( ( goop1=|goop1= )
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserConstants.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserConstants.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserConstants.java (working copy)
@@ -47,27 +47,29 @@
/** RegularExpression Id. */
int FUZZY_SLOP = 19;
/** RegularExpression Id. */
- int RANGEIN_START = 20;
+ int REGEXPTERM = 20;
/** RegularExpression Id. */
- int RANGEEX_START = 21;
+ int RANGEIN_START = 21;
/** RegularExpression Id. */
- int NUMBER = 22;
+ int RANGEEX_START = 22;
/** RegularExpression Id. */
- int RANGEIN_TO = 23;
+ int NUMBER = 23;
/** RegularExpression Id. */
- int RANGEIN_END = 24;
+ int RANGEIN_TO = 24;
/** RegularExpression Id. */
- int RANGEIN_QUOTED = 25;
+ int RANGEIN_END = 25;
/** RegularExpression Id. */
- int RANGEIN_GOOP = 26;
+ int RANGEIN_QUOTED = 26;
/** RegularExpression Id. */
- int RANGEEX_TO = 27;
+ int RANGEIN_GOOP = 27;
/** RegularExpression Id. */
- int RANGEEX_END = 28;
+ int RANGEEX_TO = 28;
/** RegularExpression Id. */
- int RANGEEX_QUOTED = 29;
+ int RANGEEX_END = 29;
/** RegularExpression Id. */
- int RANGEEX_GOOP = 30;
+ int RANGEEX_QUOTED = 30;
+ /** RegularExpression Id. */
+ int RANGEEX_GOOP = 31;
/** Lexical state. */
int Boost = 0;
@@ -100,6 +102,7 @@
"",
"",
"",
+ "",
"\"[\"",
"\"{\"",
"",
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserTokenManager.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserTokenManager.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserTokenManager.java (working copy)
@@ -38,6 +38,7 @@
import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
+import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.core.nodes.SlopQueryNode;
import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
@@ -86,11 +87,11 @@
case 58:
return jjStopAtPos(0, 15);
case 91:
- return jjStopAtPos(0, 20);
+ return jjStopAtPos(0, 21);
case 94:
return jjStopAtPos(0, 16);
case 123:
- return jjStopAtPos(0, 21);
+ return jjStopAtPos(0, 22);
default :
return jjMoveNfa_3(0, 0);
}
@@ -110,7 +111,7 @@
private int jjMoveNfa_3(int startState, int curPos)
{
int startsAt = 0;
- jjnewStateCnt = 28;
+ jjnewStateCnt = 33;
int i = 1;
jjstateSet[0] = startState;
int kind = 0x7fffffff;
@@ -144,7 +145,9 @@
if (kind > 10)
kind = 10;
}
- if (curChar == 38)
+ if (curChar == 47)
+ jjCheckNAddStates(3, 5);
+ else if (curChar == 38)
jjstateSet[jjnewStateCnt++] = 4;
break;
case 4:
@@ -198,7 +201,7 @@
break;
if (kind > 19)
kind = 19;
- jjAddStates(3, 4);
+ jjAddStates(6, 7);
break;
case 26:
if (curChar == 46)
@@ -211,6 +214,19 @@
kind = 19;
jjCheckNAdd(27);
break;
+ case 28:
+ case 30:
+ if (curChar == 47)
+ jjCheckNAddStates(3, 5);
+ break;
+ case 29:
+ if ((0xffff7fffffffffffL & l) != 0L)
+ jjCheckNAddStates(3, 5);
+ break;
+ case 32:
+ if (curChar == 47 && kind > 20)
+ kind = 20;
+ break;
default : break;
}
} while(i != startsAt);
@@ -325,6 +341,13 @@
kind = 19;
jjstateSet[jjnewStateCnt++] = 25;
break;
+ case 29:
+ jjAddStates(3, 5);
+ break;
+ case 31:
+ if (curChar == 92)
+ jjstateSet[jjnewStateCnt++] = 30;
+ break;
default : break;
}
} while(i != startsAt);
@@ -373,6 +396,10 @@
kind = 18;
jjCheckNAddTwoStates(20, 21);
break;
+ case 29:
+ if (jjCanMove_1(hiByte, i1, i2, l1, l2))
+ jjAddStates(3, 5);
+ break;
default : break;
}
} while(i != startsAt);
@@ -384,7 +411,7 @@
kind = 0x7fffffff;
}
++curPos;
- if ((i = jjnewStateCnt) == (startsAt = 28 - (jjnewStateCnt = startsAt)))
+ if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@@ -395,9 +422,9 @@
switch (pos)
{
case 0:
- if ((active0 & 0x8000000L) != 0L)
+ if ((active0 & 0x10000000L) != 0L)
{
- jjmatchedKind = 30;
+ jjmatchedKind = 31;
return 6;
}
return -1;
@@ -414,9 +441,9 @@
switch(curChar)
{
case 84:
- return jjMoveStringLiteralDfa1_1(0x8000000L);
+ return jjMoveStringLiteralDfa1_1(0x10000000L);
case 125:
- return jjStopAtPos(0, 28);
+ return jjStopAtPos(0, 29);
default :
return jjMoveNfa_1(0, 0);
}
@@ -431,8 +458,8 @@
switch(curChar)
{
case 79:
- if ((active0 & 0x8000000L) != 0L)
- return jjStartNfaWithStates_1(1, 27, 6);
+ if ((active0 & 0x10000000L) != 0L)
+ return jjStartNfaWithStates_1(1, 28, 6);
break;
default :
break;
@@ -468,8 +495,8 @@
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
- if (kind > 30)
- kind = 30;
+ if (kind > 31)
+ kind = 31;
jjCheckNAdd(6);
}
if ((0x100002600L & l) != 0L)
@@ -486,21 +513,21 @@
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
- jjCheckNAddStates(5, 7);
+ jjCheckNAddStates(8, 10);
break;
case 3:
if (curChar == 34)
- jjCheckNAddStates(5, 7);
+ jjCheckNAddStates(8, 10);
break;
case 5:
- if (curChar == 34 && kind > 29)
- kind = 29;
+ if (curChar == 34 && kind > 30)
+ kind = 30;
break;
case 6:
if ((0xfffffffeffffffffL & l) == 0L)
break;
- if (kind > 30)
- kind = 30;
+ if (kind > 31)
+ kind = 31;
jjCheckNAdd(6);
break;
default : break;
@@ -518,12 +545,12 @@
case 6:
if ((0xdfffffffffffffffL & l) == 0L)
break;
- if (kind > 30)
- kind = 30;
+ if (kind > 31)
+ kind = 31;
jjCheckNAdd(6);
break;
case 2:
- jjAddStates(5, 7);
+ jjAddStates(8, 10);
break;
case 4:
if (curChar == 92)
@@ -552,20 +579,20 @@
}
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{
- if (kind > 30)
- kind = 30;
+ if (kind > 31)
+ kind = 31;
jjCheckNAdd(6);
}
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
- jjAddStates(5, 7);
+ jjAddStates(8, 10);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
- if (kind > 30)
- kind = 30;
+ if (kind > 31)
+ kind = 31;
jjCheckNAdd(6);
break;
default : break;
@@ -610,9 +637,9 @@
case 0:
if ((0x3ff000000000000L & l) == 0L)
break;
- if (kind > 22)
- kind = 22;
- jjAddStates(8, 9);
+ if (kind > 23)
+ kind = 23;
+ jjAddStates(11, 12);
break;
case 1:
if (curChar == 46)
@@ -621,8 +648,8 @@
case 2:
if ((0x3ff000000000000L & l) == 0L)
break;
- if (kind > 22)
- kind = 22;
+ if (kind > 23)
+ kind = 23;
jjCheckNAdd(2);
break;
default : break;
@@ -673,9 +700,9 @@
switch (pos)
{
case 0:
- if ((active0 & 0x800000L) != 0L)
+ if ((active0 & 0x1000000L) != 0L)
{
- jjmatchedKind = 26;
+ jjmatchedKind = 27;
return 6;
}
return -1;
@@ -692,9 +719,9 @@
switch(curChar)
{
case 84:
- return jjMoveStringLiteralDfa1_2(0x800000L);
+ return jjMoveStringLiteralDfa1_2(0x1000000L);
case 93:
- return jjStopAtPos(0, 24);
+ return jjStopAtPos(0, 25);
default :
return jjMoveNfa_2(0, 0);
}
@@ -709,8 +736,8 @@
switch(curChar)
{
case 79:
- if ((active0 & 0x800000L) != 0L)
- return jjStartNfaWithStates_2(1, 23, 6);
+ if ((active0 & 0x1000000L) != 0L)
+ return jjStartNfaWithStates_2(1, 24, 6);
break;
default :
break;
@@ -746,8 +773,8 @@
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
- if (kind > 26)
- kind = 26;
+ if (kind > 27)
+ kind = 27;
jjCheckNAdd(6);
}
if ((0x100002600L & l) != 0L)
@@ -764,21 +791,21 @@
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
- jjCheckNAddStates(5, 7);
+ jjCheckNAddStates(8, 10);
break;
case 3:
if (curChar == 34)
- jjCheckNAddStates(5, 7);
+ jjCheckNAddStates(8, 10);
break;
case 5:
- if (curChar == 34 && kind > 25)
- kind = 25;
+ if (curChar == 34 && kind > 26)
+ kind = 26;
break;
case 6:
if ((0xfffffffeffffffffL & l) == 0L)
break;
- if (kind > 26)
- kind = 26;
+ if (kind > 27)
+ kind = 27;
jjCheckNAdd(6);
break;
default : break;
@@ -796,12 +823,12 @@
case 6:
if ((0xffffffffdfffffffL & l) == 0L)
break;
- if (kind > 26)
- kind = 26;
+ if (kind > 27)
+ kind = 27;
jjCheckNAdd(6);
break;
case 2:
- jjAddStates(5, 7);
+ jjAddStates(8, 10);
break;
case 4:
if (curChar == 92)
@@ -830,20 +857,20 @@
}
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{
- if (kind > 26)
- kind = 26;
+ if (kind > 27)
+ kind = 27;
jjCheckNAdd(6);
}
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
- jjAddStates(5, 7);
+ jjAddStates(8, 10);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
- if (kind > 26)
- kind = 26;
+ if (kind > 27)
+ kind = 27;
jjCheckNAdd(6);
break;
default : break;
@@ -864,7 +891,7 @@
}
}
static final int[] jjnextStates = {
- 15, 16, 18, 25, 26, 2, 4, 5, 0, 1,
+ 15, 16, 18, 29, 31, 32, 25, 26, 2, 4, 5, 0, 1,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
@@ -906,8 +933,8 @@
/** Token literal values. */
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, null, "\53", "\55",
-"\50", "\51", "\72", "\136", null, null, null, "\133", "\173", null, "\124\117",
-"\135", null, null, "\124\117", "\175", null, null, };
+"\50", "\51", "\72", "\136", null, null, null, null, "\133", "\173", null,
+"\124\117", "\135", null, null, "\124\117", "\175", null, null, };
/** Lexer state names. */
public static final String[] lexStateNames = {
@@ -919,18 +946,18 @@
/** Lex State array. */
public static final int[] jjnewLexState = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, 2, 1, 3, -1, 3,
- -1, -1, -1, 3, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, 2, 1, 3, -1,
+ 3, -1, -1, -1, 3, -1, -1,
};
static final long[] jjtoToken = {
- 0x7fffff01L,
+ 0xffffff01L,
};
static final long[] jjtoSkip = {
0x80L,
};
protected JavaCharStream input_stream;
-private final int[] jjrounds = new int[28];
-private final int[] jjstateSet = new int[56];
+private final int[] jjrounds = new int[33];
+private final int[] jjstateSet = new int[66];
protected char curChar;
/** Constructor. */
public StandardSyntaxParserTokenManager(JavaCharStream stream){
@@ -957,7 +984,7 @@
{
int i;
jjround = 0x80000001;
- for (i = 28; i-- > 0;)
+ for (i = 33; i-- > 0;)
jjrounds[i] = 0x80000000;
}
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java (working copy)
@@ -121,4 +121,4 @@
}
}
-/* JavaCC - OriginalChecksum=0aac6816ecd328eda2f38b9d09739ab6 (do not edit this line) */
+/* JavaCC - OriginalChecksum=cecb6022e0f2e2fca751015375f6d319 (do not edit this line) */
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java (working copy)
@@ -138,4 +138,4 @@
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
-/* JavaCC - OriginalChecksum=a75b5b61664a73631a032a6e44f4b38a (do not edit this line) */
+/* JavaCC - OriginalChecksum=0e9c5fad06efef4f41f97b851ac7b0ce (do not edit this line) */
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java (working copy)
@@ -25,9 +25,11 @@
import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
+import org.apache.lucene.queryParser.core.nodes.TextableQueryNode;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryParser.core.util.UnescapedCharSequence;
import org.apache.lucene.queryParser.standard.config.LowercaseExpandedTermsAttribute;
+import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
/**
@@ -70,10 +72,10 @@
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
if (node instanceof WildcardQueryNode || node instanceof FuzzyQueryNode
- || node instanceof ParametricQueryNode) {
+ || node instanceof ParametricQueryNode || node instanceof RegexpQueryNode) {
- FieldQueryNode fieldNode = (FieldQueryNode) node;
- fieldNode.setText(UnescapedCharSequence.toLowerCase(fieldNode.getText()));
+ TextableQueryNode txtNode = (TextableQueryNode) node;
+ txtNode.setText(UnescapedCharSequence.toLowerCase(txtNode.getText()));
}
return node;
Index: contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/MultiTermRewriteMethodProcessor.java
===================================================================
--- contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/MultiTermRewriteMethodProcessor.java (revision 990099)
+++ contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/MultiTermRewriteMethodProcessor.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute;
+import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
import org.apache.lucene.search.MultiTermQuery;
@@ -40,7 +41,7 @@
// set setMultiTermRewriteMethod for WildcardQueryNode and
// PrefixWildcardQueryNode
if (node instanceof WildcardQueryNode
- || node instanceof ParametricRangeQueryNode) {
+ || node instanceof ParametricRangeQueryNode || node instanceof RegexpQueryNode) {
if (!getQueryConfigHandler().hasAttribute(
MultiTermRewriteMethodAttribute.class)) {
Index: contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
===================================================================
--- contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (revision 990099)
+++ contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (working copy)
@@ -28,15 +28,19 @@
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@@ -690,6 +694,35 @@
query2 = parser.parse("A (-B +C)");
assertEquals(query1, query2);
}
+
+ public void testRegexps() throws Exception {
+ PrecedenceQueryParser qp = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+ RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
+ assertEquals(q, qp.parse("/[a-z][123]/"));
+ qp.setLowercaseExpandedTerms(true);
+ assertEquals(q, qp.parse("/[A-Z][123]/"));
+ q.setBoost(0.5f);
+ assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
+ qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ assertTrue(qp.parse("/[A-Z][123]/^0.5") instanceof RegexpQuery);
+ assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5")).getRewriteMethod());
+ assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
+ qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+
+ Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
+ assertEquals(escaped, qp.parse("/[a-z]\\/[123]/"));
+ Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
+ assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/"));
+
+ BooleanQuery complex = new BooleanQuery();
+ BooleanQuery inner = new BooleanQuery();
+ inner.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST);
+ inner.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST);
+ complex.add(inner, Occur.SHOULD);
+ complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD);
+ assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ "));
+ }
@Override
Index: contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
===================================================================
--- contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (revision 990099)
+++ contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (working copy)
@@ -68,10 +68,12 @@
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.automaton.BasicAutomata;
@@ -1155,6 +1157,34 @@
// assertEquals(1,type[0]);
}
+
+ public void testRegexps() throws Exception {
+ StandardQueryParser qp = new StandardQueryParser();
+ final String df = "field" ;
+ RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
+ assertEquals(q, qp.parse("/[a-z][123]/", df));
+ qp.setLowercaseExpandedTerms(true);
+ assertEquals(q, qp.parse("/[A-Z][123]/", df));
+ q.setBoost(0.5f);
+ assertEquals(q, qp.parse("/[A-Z][123]/^0.5", df));
+ qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ assertTrue(qp.parse("/[A-Z][123]/^0.5", df) instanceof RegexpQuery);
+ assertEquals(q, qp.parse("/[A-Z][123]/^0.5", df));
+ assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5", df)).getRewriteMethod());
+ qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+
+ Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
+ assertEquals(escaped, qp.parse("/[a-z]\\/[123]/", df));
+ Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
+ assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/", df));
+
+ BooleanQuery complex = new BooleanQuery();
+ complex.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST);
+ complex.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST);
+ complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD);
+ assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ ", df));
+ }
public void testStopwords() throws Exception {
StandardQueryParser qp = new StandardQueryParser();
Index: src/java/org/apache/lucene/queryParser/CharStream.java
===================================================================
--- src/java/org/apache/lucene/queryParser/CharStream.java (revision 990099)
+++ src/java/org/apache/lucene/queryParser/CharStream.java (working copy)
@@ -109,4 +109,4 @@
void Done();
}
-/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */
+/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/ParseException.java
===================================================================
--- src/java/org/apache/lucene/queryParser/ParseException.java (revision 990099)
+++ src/java/org/apache/lucene/queryParser/ParseException.java (working copy)
@@ -195,4 +195,4 @@
}
}
-/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */
+/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/QueryParser.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.java (revision 990099)
+++ src/java/org/apache/lucene/queryParser/QueryParser.java (working copy)
@@ -29,6 +29,7 @@
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@@ -862,6 +863,17 @@
}
/**
+ * Builds a new RegexpQuery instance
+ * @param prefix Regexp term
+ * @return new RegexpQuery instance
+ */
+ protected Query newRegexpQuery(Term regexp) {
+ RegexpQuery query = new RegexpQuery(regexp);
+ query.setRewriteMethod(multiTermRewriteMethod);
+ return query;
+ }
+
+ /**
* Builds a new FuzzyQuery instance
* @param term Term
* @param minimumSimilarity minimum similarity
@@ -986,6 +998,35 @@
}
/**
+ * Factory method for generating a query. Called when parser
+ * parses an input term token that contains a regular expression
+ * query.
+ *
+ * Depending on settings, pattern term may be lower-cased
+ * automatically. It will not go through the default Analyzer,
+ * however, since normal Analyzers are unlikely to work properly
+ * with regular expression templates.
+ *
+ * Can be overridden by extending classes, to provide custom handling for
+ * regular expression queries, which may be necessary due to missing analyzer
+ * calls.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token that contains a regular expression
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getRegexpQuery(String field, String termStr) throws ParseException
+ {
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ Term t = new Term(field, termStr);
+ return newRegexpQuery(t);
+ }
+
+ /**
* Factory method for generating a query (similar to
* {@link #getWildcardQuery}). Called when parser parses an input term
* token that uses prefix notation; that is, contains a single '*' wildcard
@@ -1234,6 +1275,7 @@
case TERM:
case PREFIXTERM:
case WILDTERM:
+ case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@@ -1285,6 +1327,7 @@
case TERM:
case PREFIXTERM:
case WILDTERM:
+ case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@@ -1325,12 +1368,14 @@
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
+ boolean regexp = false;
Query q;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case STAR:
case TERM:
case PREFIXTERM:
case WILDTERM:
+ case REGEXPTERM:
case NUMBER:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
@@ -1348,6 +1393,10 @@
term = jj_consume_token(WILDTERM);
wildcard=true;
break;
+ case REGEXPTERM:
+ term = jj_consume_token(REGEXPTERM);
+ regexp=true;
+ break;
case NUMBER:
term = jj_consume_token(NUMBER);
break;
@@ -1390,6 +1439,8 @@
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
+ } else if (regexp) {
+ q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {
@@ -1569,6 +1620,12 @@
finally { jj_save(0, xla); }
}
+ private boolean jj_3R_2() {
+ if (jj_scan_token(TERM)) return true;
+ if (jj_scan_token(COLON)) return true;
+ return false;
+ }
+
private boolean jj_3_1() {
Token xsp;
xsp = jj_scanpos;
@@ -1585,12 +1642,6 @@
return false;
}
- private boolean jj_3R_2() {
- if (jj_scan_token(TERM)) return true;
- if (jj_scan_token(COLON)) return true;
- return false;
- }
-
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */
@@ -1609,10 +1660,10 @@
jj_la1_init_1();
}
private static void jj_la1_init_0() {
- jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x3ed3f00,0x90000,0x20000,0x3ed2000,0x2690000,0x100000,0x100000,0x20000,0x30000000,0x4000000,0x30000000,0x20000,0x0,0x40000000,0x0,0x20000,0x100000,0x20000,0x3ed0000,};
+ jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x7ed3f00,0x90000,0x20000,0x7ed2000,0x4e90000,0x100000,0x100000,0x20000,0x60000000,0x8000000,0x60000000,0x20000,0x0,0x80000000,0x0,0x20000,0x100000,0x20000,0x7ed0000,};
}
private static void jj_la1_init_1() {
- jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x0,0x3,0x0,0x0,0x0,0x0,};
+ jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x6,0x0,0x6,0x0,0x0,0x0,0x0,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[1];
private boolean jj_rescan = false;
@@ -1766,7 +1817,7 @@
/** Generate ParseException. */
public ParseException generateParseException() {
jj_expentries.clear();
- boolean[] la1tokens = new boolean[34];
+ boolean[] la1tokens = new boolean[35];
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
@@ -1783,7 +1834,7 @@
}
}
}
- for (int i = 0; i < 34; i++) {
+ for (int i = 0; i < 35; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;
Index: src/java/org/apache/lucene/queryParser/QueryParser.jj
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 990099)
+++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy)
@@ -53,6 +53,7 @@
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@@ -886,6 +887,17 @@
}
/**
+ * Builds a new RegexpQuery instance
+ * @param prefix Regexp term
+ * @return new RegexpQuery instance
+ */
+ protected Query newRegexpQuery(Term regexp) {
+ RegexpQuery query = new RegexpQuery(regexp);
+ query.setRewriteMethod(multiTermRewriteMethod);
+ return query;
+ }
+
+ /**
* Builds a new FuzzyQuery instance
* @param term Term
* @param minimumSimilarity minimum similarity
@@ -1010,6 +1022,35 @@
}
/**
+ * Factory method for generating a query. Called when parser
+ * parses an input term token that contains a regular expression
+ * query.
+ *
+ * Depending on settings, pattern term may be lower-cased
+ * automatically. It will not go through the default Analyzer,
+ * however, since normal Analyzers are unlikely to work properly
+ * with regular expression templates.
+ *
+ * Can be overridden by extending classes, to provide custom handling for
+ * regular expression queries, which may be necessary due to missing analyzer
+ * calls.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token that contains a regular expression
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getRegexpQuery(String field, String termStr) throws ParseException
+ {
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ Term t = new Term(field, termStr);
+ return newRegexpQuery(t);
+ }
+
+ /**
* Factory method for generating a query (similar to
* {@link #getWildcardQuery}). Called when parser parses an input term
* token that uses prefix notation; that is, contains a single '*' wildcard
@@ -1218,6 +1259,7 @@
| )+ ( "." (<_NUM_CHAR>)+ )? )? >
| (<_TERM_CHAR>)* "*" ) >
| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
+|
| : RangeIn
| : RangeEx
}
@@ -1340,6 +1382,7 @@
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
+ boolean regexp = false;
Query q;
}
{
@@ -1349,6 +1392,7 @@
| term= { wildcard=true; }
| term= { prefix=true; }
| term= { wildcard=true; }
+ | term= { regexp=true; }
| term=
)
[ fuzzySlop= { fuzzy=true; } ]
@@ -1361,6 +1405,8 @@
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
+ } else if (regexp) {
+ q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {
Index: src/java/org/apache/lucene/queryParser/QueryParserConstants.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParserConstants.java (revision 990099)
+++ src/java/org/apache/lucene/queryParser/QueryParserConstants.java (working copy)
@@ -53,27 +53,29 @@
/** RegularExpression Id. */
int WILDTERM = 22;
/** RegularExpression Id. */
- int RANGEIN_START = 23;
+ int REGEXPTERM = 23;
/** RegularExpression Id. */
- int RANGEEX_START = 24;
+ int RANGEIN_START = 24;
/** RegularExpression Id. */
- int NUMBER = 25;
+ int RANGEEX_START = 25;
/** RegularExpression Id. */
- int RANGEIN_TO = 26;
+ int NUMBER = 26;
/** RegularExpression Id. */
- int RANGEIN_END = 27;
+ int RANGEIN_TO = 27;
/** RegularExpression Id. */
- int RANGEIN_QUOTED = 28;
+ int RANGEIN_END = 28;
/** RegularExpression Id. */
- int RANGEIN_GOOP = 29;
+ int RANGEIN_QUOTED = 29;
/** RegularExpression Id. */
- int RANGEEX_TO = 30;
+ int RANGEIN_GOOP = 30;
/** RegularExpression Id. */
- int RANGEEX_END = 31;
+ int RANGEEX_TO = 31;
/** RegularExpression Id. */
- int RANGEEX_QUOTED = 32;
+ int RANGEEX_END = 32;
/** RegularExpression Id. */
- int RANGEEX_GOOP = 33;
+ int RANGEEX_QUOTED = 33;
+ /** RegularExpression Id. */
+ int RANGEEX_GOOP = 34;
/** Lexical state. */
int Boost = 0;
@@ -109,6 +111,7 @@
"",
"",
"",
+ "",
"\"[\"",
"\"{\"",
"",
Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java
===================================================================
--- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 990099)
+++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy)
@@ -27,6 +27,7 @@
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@@ -70,7 +71,7 @@
case 41:
return jjStopAtPos(0, 14);
case 42:
- return jjStartNfaWithStates_3(0, 16, 36);
+ return jjStartNfaWithStates_3(0, 16, 41);
case 43:
return jjStopAtPos(0, 11);
case 45:
@@ -78,11 +79,11 @@
case 58:
return jjStopAtPos(0, 15);
case 91:
- return jjStopAtPos(0, 23);
+ return jjStopAtPos(0, 24);
case 94:
return jjStopAtPos(0, 17);
case 123:
- return jjStopAtPos(0, 24);
+ return jjStopAtPos(0, 25);
default :
return jjMoveNfa_3(0, 0);
}
@@ -110,7 +111,7 @@
private int jjMoveNfa_3(int startState, int curPos)
{
int startsAt = 0;
- jjnewStateCnt = 36;
+ jjnewStateCnt = 41;
int i = 1;
jjstateSet[0] = startState;
int kind = 0x7fffffff;
@@ -125,14 +126,6 @@
{
switch(jjstateSet[--i])
{
- case 36:
- case 25:
- if ((0xfbfffcf8ffffd9ffL & l) == 0L)
- break;
- if (kind > 22)
- kind = 22;
- jjCheckNAddTwoStates(25, 26);
- break;
case 0:
if ((0xfbffd4f8ffffd9ffL & l) != 0L)
{
@@ -163,9 +156,19 @@
if (kind > 21)
kind = 21;
}
- if (curChar == 38)
+ if (curChar == 47)
+ jjCheckNAddStates(8, 10);
+ else if (curChar == 38)
jjstateSet[jjnewStateCnt++] = 4;
break;
+ case 41:
+ case 25:
+ if ((0xfbfffcf8ffffd9ffL & l) == 0L)
+ break;
+ if (kind > 22)
+ kind = 22;
+ jjCheckNAddTwoStates(25, 26);
+ break;
case 4:
if (curChar == 38 && kind > 8)
kind = 8;
@@ -198,7 +201,7 @@
break;
if (kind > 20)
kind = 20;
- jjAddStates(8, 9);
+ jjAddStates(11, 12);
break;
case 21:
if (curChar == 46)
@@ -228,30 +231,43 @@
jjCheckNAddTwoStates(25, 26);
break;
case 28:
+ case 30:
+ if (curChar == 47)
+ jjCheckNAddStates(8, 10);
+ break;
+ case 29:
+ if ((0xffff7fffffffffffL & l) != 0L)
+ jjCheckNAddStates(8, 10);
+ break;
+ case 32:
+ if (curChar == 47 && kind > 23)
+ kind = 23;
+ break;
+ case 33:
if ((0x7bffd0f8ffffd9ffL & l) == 0L)
break;
if (kind > 19)
kind = 19;
jjCheckNAddStates(3, 7);
break;
- case 29:
+ case 34:
if ((0x7bfff8f8ffffd9ffL & l) == 0L)
break;
if (kind > 19)
kind = 19;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
- case 31:
+ case 36:
if (kind > 19)
kind = 19;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
- case 32:
+ case 37:
if ((0x7bfff8f8ffffd9ffL & l) != 0L)
- jjCheckNAddStates(10, 12);
+ jjCheckNAddStates(13, 15);
break;
- case 34:
- jjCheckNAddStates(10, 12);
+ case 39:
+ jjCheckNAddStates(13, 15);
break;
default : break;
}
@@ -264,16 +280,6 @@
{
switch(jjstateSet[--i])
{
- case 36:
- if ((0x97ffffff87ffffffL & l) != 0L)
- {
- if (kind > 22)
- kind = 22;
- jjCheckNAddTwoStates(25, 26);
- }
- else if (curChar == 92)
- jjCheckNAddTwoStates(27, 27);
- break;
case 0:
if ((0x97ffffff87ffffffL & l) != 0L)
{
@@ -282,7 +288,7 @@
jjCheckNAddStates(3, 7);
}
else if (curChar == 92)
- jjCheckNAddStates(13, 15);
+ jjCheckNAddStates(16, 18);
else if (curChar == 126)
{
if (kind > 20)
@@ -304,6 +310,16 @@
else if (curChar == 65)
jjstateSet[jjnewStateCnt++] = 2;
break;
+ case 41:
+ if ((0x97ffffff87ffffffL & l) != 0L)
+ {
+ if (kind > 22)
+ kind = 22;
+ jjCheckNAddTwoStates(25, 26);
+ }
+ else if (curChar == 92)
+ jjCheckNAddTwoStates(27, 27);
+ break;
case 1:
if (curChar == 68 && kind > 8)
kind = 8;
@@ -385,43 +401,50 @@
kind = 22;
jjCheckNAddTwoStates(25, 26);
break;
- case 28:
+ case 29:
+ jjAddStates(8, 10);
+ break;
+ case 31:
+ if (curChar == 92)
+ jjstateSet[jjnewStateCnt++] = 30;
+ break;
+ case 33:
if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 19)
kind = 19;
jjCheckNAddStates(3, 7);
break;
- case 29:
+ case 34:
if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 19)
kind = 19;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
- case 30:
+ case 35:
if (curChar == 92)
- jjCheckNAddTwoStates(31, 31);
+ jjCheckNAddTwoStates(36, 36);
break;
- case 31:
+ case 36:
if (kind > 19)
kind = 19;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
- case 32:
+ case 37:
if ((0x97ffffff87ffffffL & l) != 0L)
- jjCheckNAddStates(10, 12);
+ jjCheckNAddStates(13, 15);
break;
- case 33:
+ case 38:
if (curChar == 92)
- jjCheckNAddTwoStates(34, 34);
+ jjCheckNAddTwoStates(39, 39);
break;
- case 34:
- jjCheckNAddStates(10, 12);
+ case 39:
+ jjCheckNAddStates(13, 15);
break;
- case 35:
+ case 40:
if (curChar == 92)
- jjCheckNAddStates(13, 15);
+ jjCheckNAddStates(16, 18);
break;
default : break;
}
@@ -438,14 +461,6 @@
{
switch(jjstateSet[--i])
{
- case 36:
- case 25:
- if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
- break;
- if (kind > 22)
- kind = 22;
- jjCheckNAddTwoStates(25, 26);
- break;
case 0:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
{
@@ -465,6 +480,14 @@
jjCheckNAddStates(3, 7);
}
break;
+ case 41:
+ case 25:
+ if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
+ break;
+ if (kind > 22)
+ kind = 22;
+ jjCheckNAddTwoStates(25, 26);
+ break;
case 15:
case 17:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
@@ -484,34 +507,38 @@
kind = 22;
jjCheckNAddTwoStates(25, 26);
break;
- case 28:
+ case 29:
+ if (jjCanMove_1(hiByte, i1, i2, l1, l2))
+ jjAddStates(8, 10);
+ break;
+ case 33:
if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
break;
if (kind > 19)
kind = 19;
jjCheckNAddStates(3, 7);
break;
- case 29:
+ case 34:
if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
break;
if (kind > 19)
kind = 19;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
- case 31:
+ case 36:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
if (kind > 19)
kind = 19;
- jjCheckNAddTwoStates(29, 30);
+ jjCheckNAddTwoStates(34, 35);
break;
- case 32:
+ case 37:
if (jjCanMove_2(hiByte, i1, i2, l1, l2))
- jjCheckNAddStates(10, 12);
+ jjCheckNAddStates(13, 15);
break;
- case 34:
+ case 39:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
- jjCheckNAddStates(10, 12);
+ jjCheckNAddStates(13, 15);
break;
default : break;
}
@@ -524,7 +551,7 @@
kind = 0x7fffffff;
}
++curPos;
- if ((i = jjnewStateCnt) == (startsAt = 36 - (jjnewStateCnt = startsAt)))
+ if ((i = jjnewStateCnt) == (startsAt = 41 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@@ -535,9 +562,9 @@
switch (pos)
{
case 0:
- if ((active0 & 0x40000000L) != 0L)
+ if ((active0 & 0x80000000L) != 0L)
{
- jjmatchedKind = 33;
+ jjmatchedKind = 34;
return 6;
}
return -1;
@@ -554,9 +581,9 @@
switch(curChar)
{
case 84:
- return jjMoveStringLiteralDfa1_1(0x40000000L);
+ return jjMoveStringLiteralDfa1_1(0x80000000L);
case 125:
- return jjStopAtPos(0, 31);
+ return jjStopAtPos(0, 32);
default :
return jjMoveNfa_1(0, 0);
}
@@ -571,8 +598,8 @@
switch(curChar)
{
case 79:
- if ((active0 & 0x40000000L) != 0L)
- return jjStartNfaWithStates_1(1, 30, 6);
+ if ((active0 & 0x80000000L) != 0L)
+ return jjStartNfaWithStates_1(1, 31, 6);
break;
default :
break;
@@ -608,8 +635,8 @@
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
- if (kind > 33)
- kind = 33;
+ if (kind > 34)
+ kind = 34;
jjCheckNAdd(6);
}
if ((0x100002600L & l) != 0L)
@@ -626,21 +653,21 @@
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
- jjCheckNAddStates(16, 18);
+ jjCheckNAddStates(19, 21);
break;
case 3:
if (curChar == 34)
- jjCheckNAddStates(16, 18);
+ jjCheckNAddStates(19, 21);
break;
case 5:
- if (curChar == 34 && kind > 32)
- kind = 32;
+ if (curChar == 34 && kind > 33)
+ kind = 33;
break;
case 6:
if ((0xfffffffeffffffffL & l) == 0L)
break;
- if (kind > 33)
- kind = 33;
+ if (kind > 34)
+ kind = 34;
jjCheckNAdd(6);
break;
default : break;
@@ -658,12 +685,12 @@
case 6:
if ((0xdfffffffffffffffL & l) == 0L)
break;
- if (kind > 33)
- kind = 33;
+ if (kind > 34)
+ kind = 34;
jjCheckNAdd(6);
break;
case 2:
- jjAddStates(16, 18);
+ jjAddStates(19, 21);
break;
case 4:
if (curChar == 92)
@@ -692,20 +719,20 @@
}
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{
- if (kind > 33)
- kind = 33;
+ if (kind > 34)
+ kind = 34;
jjCheckNAdd(6);
}
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
- jjAddStates(16, 18);
+ jjAddStates(19, 21);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
- if (kind > 33)
- kind = 33;
+ if (kind > 34)
+ kind = 34;
jjCheckNAdd(6);
break;
default : break;
@@ -750,9 +777,9 @@
case 0:
if ((0x3ff000000000000L & l) == 0L)
break;
- if (kind > 25)
- kind = 25;
- jjAddStates(19, 20);
+ if (kind > 26)
+ kind = 26;
+ jjAddStates(22, 23);
break;
case 1:
if (curChar == 46)
@@ -761,8 +788,8 @@
case 2:
if ((0x3ff000000000000L & l) == 0L)
break;
- if (kind > 25)
- kind = 25;
+ if (kind > 26)
+ kind = 26;
jjCheckNAdd(2);
break;
default : break;
@@ -813,9 +840,9 @@
switch (pos)
{
case 0:
- if ((active0 & 0x4000000L) != 0L)
+ if ((active0 & 0x8000000L) != 0L)
{
- jjmatchedKind = 29;
+ jjmatchedKind = 30;
return 6;
}
return -1;
@@ -832,9 +859,9 @@
switch(curChar)
{
case 84:
- return jjMoveStringLiteralDfa1_2(0x4000000L);
+ return jjMoveStringLiteralDfa1_2(0x8000000L);
case 93:
- return jjStopAtPos(0, 27);
+ return jjStopAtPos(0, 28);
default :
return jjMoveNfa_2(0, 0);
}
@@ -849,8 +876,8 @@
switch(curChar)
{
case 79:
- if ((active0 & 0x4000000L) != 0L)
- return jjStartNfaWithStates_2(1, 26, 6);
+ if ((active0 & 0x8000000L) != 0L)
+ return jjStartNfaWithStates_2(1, 27, 6);
break;
default :
break;
@@ -886,8 +913,8 @@
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
- if (kind > 29)
- kind = 29;
+ if (kind > 30)
+ kind = 30;
jjCheckNAdd(6);
}
if ((0x100002600L & l) != 0L)
@@ -904,21 +931,21 @@
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
- jjCheckNAddStates(16, 18);
+ jjCheckNAddStates(19, 21);
break;
case 3:
if (curChar == 34)
- jjCheckNAddStates(16, 18);
+ jjCheckNAddStates(19, 21);
break;
case 5:
- if (curChar == 34 && kind > 28)
- kind = 28;
+ if (curChar == 34 && kind > 29)
+ kind = 29;
break;
case 6:
if ((0xfffffffeffffffffL & l) == 0L)
break;
- if (kind > 29)
- kind = 29;
+ if (kind > 30)
+ kind = 30;
jjCheckNAdd(6);
break;
default : break;
@@ -936,12 +963,12 @@
case 6:
if ((0xffffffffdfffffffL & l) == 0L)
break;
- if (kind > 29)
- kind = 29;
+ if (kind > 30)
+ kind = 30;
jjCheckNAdd(6);
break;
case 2:
- jjAddStates(16, 18);
+ jjAddStates(19, 21);
break;
case 4:
if (curChar == 92)
@@ -970,20 +997,20 @@
}
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{
- if (kind > 29)
- kind = 29;
+ if (kind > 30)
+ kind = 30;
jjCheckNAdd(6);
}
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
- jjAddStates(16, 18);
+ jjAddStates(19, 21);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
- if (kind > 29)
- kind = 29;
+ if (kind > 30)
+ kind = 30;
jjCheckNAdd(6);
break;
default : break;
@@ -1004,8 +1031,8 @@
}
}
static final int[] jjnextStates = {
- 15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27,
- 2, 4, 5, 0, 1,
+ 15, 16, 18, 34, 37, 23, 38, 35, 29, 31, 32, 20, 21, 37, 23, 38,
+ 36, 39, 27, 2, 4, 5, 0, 1,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
@@ -1047,8 +1074,8 @@
/** Token literal values. */
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, null, "\53", "\55",
-"\50", "\51", "\72", "\52", "\136", null, null, null, null, null, "\133", "\173",
-null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, };
+"\50", "\51", "\72", "\52", "\136", null, null, null, null, null, null, "\133",
+"\173", null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, };
/** Lexer state names. */
public static final String[] lexStateNames = {
@@ -1060,18 +1087,18 @@
/** Lex State array. */
public static final int[] jjnewLexState = {
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1,
- 3, -1, 3, -1, -1, -1, 3, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2,
+ 1, 3, -1, 3, -1, -1, -1, 3, -1, -1,
};
static final long[] jjtoToken = {
- 0x3ffffff01L,
+ 0x7ffffff01L,
};
static final long[] jjtoSkip = {
0x80L,
};
protected CharStream input_stream;
-private final int[] jjrounds = new int[36];
-private final int[] jjstateSet = new int[72];
+private final int[] jjrounds = new int[41];
+private final int[] jjstateSet = new int[82];
protected char curChar;
/** Constructor. */
public QueryParserTokenManager(CharStream stream){
@@ -1096,7 +1123,7 @@
{
int i;
jjround = 0x80000001;
- for (i = 36; i-- > 0;)
+ for (i = 41; i-- > 0;)
jjrounds[i] = 0x80000000;
}
Index: src/java/org/apache/lucene/queryParser/Token.java
===================================================================
--- src/java/org/apache/lucene/queryParser/Token.java (revision 990099)
+++ src/java/org/apache/lucene/queryParser/Token.java (working copy)
@@ -121,4 +121,4 @@
}
}
-/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */
+/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */
Index: src/java/org/apache/lucene/queryParser/TokenMgrError.java
===================================================================
--- src/java/org/apache/lucene/queryParser/TokenMgrError.java (revision 990099)
+++ src/java/org/apache/lucene/queryParser/TokenMgrError.java (working copy)
@@ -138,4 +138,4 @@
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
-/* JavaCC - OriginalChecksum=1c94e13236c7e0121e49427992341ee3 (do not edit this line) */
+/* JavaCC - OriginalChecksum=334e679cf1a88b3070bb8e3d80ee3f5e (do not edit this line) */
Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java
===================================================================
--- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 990099)
+++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy)
@@ -54,10 +54,12 @@
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.automaton.BasicAutomata;
@@ -1054,6 +1056,33 @@
}
+ public void testRegexps() throws Exception {
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+ RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
+ assertEquals(q, qp.parse("/[a-z][123]/"));
+ qp.setLowercaseExpandedTerms(true);
+ assertEquals(q, qp.parse("/[A-Z][123]/"));
+ q.setBoost(0.5f);
+ assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
+ qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ assertTrue(qp.parse("/[A-Z][123]/^0.5") instanceof RegexpQuery);
+ assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5")).getRewriteMethod());
+ assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
+ qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+
+ Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
+ assertEquals(escaped, qp.parse("/[a-z]\\/[123]/"));
+ Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
+ assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/"));
+
+ BooleanQuery complex = new BooleanQuery();
+ complex.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST);
+ complex.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST);
+ complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD);
+ assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ "));
+ }
+
public void testStopwords() throws Exception {
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true));