Index: src/java/org/apache/lucene/queryParser/QueryParser.jj =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParser.jj (revision 510176) +++ src/java/org/apache/lucene/queryParser/QueryParser.jj (working copy) @@ -77,7 +77,7 @@ * By default a date is converted into a search term using the deprecated * {@link DateField} for compatibility reasons. * To use the new {@link DateTools} to convert dates, a - * {@link DateTools.Resolution} has to be set. + * {@link org.apache.lucene.document.DateTools.Resolution} has to be set. *

*

* The date resolution that shall be used for RangeQueries can be set @@ -345,7 +345,7 @@ /** * Sets the date resolution used by RangeQueries for a specific field. * - * @param field field for which the date resolution is to be set + * @param fieldName field for which the date resolution is to be set * @param dateResolution date resolution to set */ public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) { @@ -882,7 +882,7 @@ | | (<_TERM_CHAR>)* > | )+ ( "." (<_NUM_CHAR>)+ )? )? > -| | "*") (<_TERM_CHAR>)* "*" > +| (<_TERM_CHAR>)* "*" ) > | | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > | : RangeIn | : RangeEx Index: src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java =================================================================== --- src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (revision 510176) +++ src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java (working copy) @@ -49,7 +49,7 @@ case 41: return jjStopAtPos(0, 13); case 42: - return jjStartNfaWithStates_3(0, 15, 37); + return jjStartNfaWithStates_3(0, 15, 36); case 43: return jjStopAtPos(0, 10); case 45: @@ -106,7 +106,7 @@ { int[] nextStates; int startsAt = 0; - jjnewStateCnt = 37; + jjnewStateCnt = 36; int i = 1; jjstateSet[0] = startState; int j, kind = 0x7fffffff; @@ -121,27 +121,20 @@ { switch(jjstateSet[--i]) { - case 37: - if ((0xfbfffcf8ffffd9ffL & l) != 0L) - { - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(33, 34); - } - if ((0x7bfff8f8ffffd9ffL & l) != 0L) - jjCheckNAddStates(0, 2); - else if (curChar == 42) - { - if (kind > 20) - kind = 20; - } + case 36: + case 25: + if ((0xfbfffcf8ffffd9ffL & l) == 0L) + break; + if (kind > 21) + kind = 21; + jjCheckNAddTwoStates(25, 26); break; case 0: if ((0xfbffd4f8ffffd9ffL & l) != 0L) { if (kind > 21) kind = 21; - jjCheckNAddTwoStates(33, 34); + jjCheckNAddTwoStates(25, 26); } else if ((0x100002600L & l) != 0L) { @@ -155,14 +148,17 @@ if (kind > 9) kind = 9; } - if ((0x7bffd4f8ffffd9ffL & l) != 0L) - jjCheckNAddStates(0, 2); if ((0x7bffd0f8ffffd9ffL & l) != 0L) { if (kind > 18) kind = 18; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddStates(0, 4); } + else if (curChar == 42) + { + if (kind > 20) + kind = 20; + } if (curChar == 38) jjstateSet[jjnewStateCnt++] = 4; break; @@ -184,92 +180,81 @@ break; case 15: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(3, 5); + jjCheckNAddStates(5, 7); break; case 16: if (curChar == 34) - jjCheckNAddStates(3, 5); + jjCheckNAddStates(5, 7); break; case 18: if (curChar == 34 && kind > 17) kind = 17; break; - case 19: - if ((0x7bffd0f8ffffd9ffL & l) == 0L) - break; - if (kind > 18) - kind = 18; - jjCheckNAddTwoStates(20, 21); - break; case 20: - if ((0x7bfff8f8ffffd9ffL & l) == 0L) - break; - if (kind > 18) - kind = 18; - jjCheckNAddTwoStates(20, 21); - break; - case 22: - if ((0x84002f0600000000L & l) == 0L) - break; - if (kind > 18) - kind = 18; - jjCheckNAddTwoStates(20, 21); - break; - case 24: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 19) kind = 19; - jjAddStates(6, 7); + jjAddStates(8, 9); break; - case 25: + case 21: if (curChar == 46) - jjCheckNAdd(26); + jjCheckNAdd(22); break; - case 26: + case 22: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 19) kind = 19; - jjCheckNAdd(26); + jjCheckNAdd(22); break; - case 27: - if ((0x7bffd4f8ffffd9ffL & l) != 0L) - jjCheckNAddStates(0, 2); - break; - case 28: - if ((0x7bfff8f8ffffd9ffL & l) != 0L) - jjCheckNAddStates(0, 2); - break; - case 29: + case 23: if (curChar == 42 && kind > 20) kind = 20; break; - case 31: - if ((0x84002f0600000000L & l) != 0L) - jjCheckNAddStates(0, 2); - break; - case 32: + case 24: if ((0xfbffd4f8ffffd9ffL & l) == 0L) break; if (kind > 21) kind = 21; - jjCheckNAddTwoStates(33, 34); + jjCheckNAddTwoStates(25, 26); break; - case 33: - if ((0xfbfffcf8ffffd9ffL & l) == 0L) + case 27: + if ((0x84002f0600000000L & l) == 0L) break; if (kind > 21) kind = 21; - jjCheckNAddTwoStates(33, 34); + jjCheckNAddTwoStates(25, 26); break; - case 35: + case 28: + if ((0x7bffd0f8ffffd9ffL & l) == 0L) + break; + if (kind > 18) + kind = 18; + jjCheckNAddStates(0, 4); + break; + case 29: + if ((0x7bfff8f8ffffd9ffL & l) == 0L) + break; + if (kind > 18) + kind = 18; + jjCheckNAddTwoStates(29, 30); + break; + case 31: if ((0x84002f0600000000L & l) == 0L) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(33, 34); + if (kind > 18) + kind = 18; + jjCheckNAddTwoStates(29, 30); break; + case 32: + if ((0x7bfff8f8ffffd9ffL & l) != 0L) + jjCheckNAddStates(10, 12); + break; + case 34: + if ((0x84002f0600000000L & l) != 0L) + jjCheckNAddStates(10, 12); + break; default : break; } } while(i != startsAt); @@ -281,43 +266,37 @@ { switch(jjstateSet[--i]) { - case 37: + case 36: if ((0x97ffffff97ffffffL & l) != 0L) { if (kind > 21) kind = 21; - jjCheckNAddTwoStates(33, 34); + jjCheckNAddTwoStates(25, 26); } - if ((0x97ffffff97ffffffL & l) != 0L) - jjCheckNAddStates(0, 2); if (curChar == 92) - jjCheckNAddTwoStates(31, 31); - if (curChar == 92) - jjCheckNAddTwoStates(35, 35); + jjCheckNAddTwoStates(27, 27); break; case 0: if ((0x97ffffff97ffffffL & l) != 0L) { - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(33, 34); + if (kind > 18) + kind = 18; + jjCheckNAddStates(0, 4); } else if (curChar == 126) { if (kind > 19) kind = 19; - jjstateSet[jjnewStateCnt++] = 24; + jjstateSet[jjnewStateCnt++] = 20; } if ((0x97ffffff97ffffffL & l) != 0L) - jjCheckNAddStates(0, 2); - if ((0x97ffffff97ffffffL & l) != 0L) { - if (kind > 18) - kind = 18; - jjCheckNAddTwoStates(20, 21); + if (kind > 21) + kind = 21; + jjCheckNAddTwoStates(25, 26); } if (curChar == 92) - jjCheckNAddStates(8, 10); + jjCheckNAddStates(13, 15); else if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; else if (curChar == 124) @@ -368,82 +347,84 @@ jjstateSet[jjnewStateCnt++] = 11; break; case 15: - jjAddStates(3, 5); + jjAddStates(5, 7); break; case 17: if (curChar == 92) jjstateSet[jjnewStateCnt++] = 16; break; case 19: - case 20: + if (curChar != 126) + break; + if (kind > 19) + kind = 19; + jjstateSet[jjnewStateCnt++] = 20; + break; + case 24: if ((0x97ffffff97ffffffL & l) == 0L) break; - if (kind > 18) - kind = 18; - jjCheckNAddTwoStates(20, 21); + if (kind > 21) + kind = 21; + jjCheckNAddTwoStates(25, 26); break; - case 21: + case 25: + if ((0x97ffffff97ffffffL & l) == 0L) + break; + if (kind > 21) + kind = 21; + jjCheckNAddTwoStates(25, 26); + break; + case 26: if (curChar == 92) - jjCheckNAddTwoStates(22, 22); + jjCheckNAddTwoStates(27, 27); break; - case 22: + case 27: if ((0x6800000078000000L & l) == 0L) break; + if (kind > 21) + kind = 21; + jjCheckNAddTwoStates(25, 26); + break; + case 28: + if ((0x97ffffff97ffffffL & l) == 0L) + break; if (kind > 18) kind = 18; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddStates(0, 4); break; - case 23: - if (curChar != 126) + case 29: + if ((0x97ffffff97ffffffL & l) == 0L) break; - if (kind > 19) - kind = 19; - jjstateSet[jjnewStateCnt++] = 24; + if (kind > 18) + kind = 18; + jjCheckNAddTwoStates(29, 30); break; - case 27: - if ((0x97ffffff97ffffffL & l) != 0L) - jjCheckNAddStates(0, 2); - break; - case 28: - if ((0x97ffffff97ffffffL & l) != 0L) - jjCheckNAddStates(0, 2); - break; case 30: if (curChar == 92) jjCheckNAddTwoStates(31, 31); break; case 31: - if ((0x6800000078000000L & l) != 0L) - jjCheckNAddStates(0, 2); + if ((0x6800000078000000L & l) == 0L) + break; + if (kind > 18) + kind = 18; + jjCheckNAddTwoStates(29, 30); break; case 32: - if ((0x97ffffff97ffffffL & l) == 0L) - break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(33, 34); + if ((0x97ffffff97ffffffL & l) != 0L) + jjCheckNAddStates(10, 12); break; case 33: - if ((0x97ffffff97ffffffL & l) == 0L) - break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(33, 34); + if (curChar == 92) + jjCheckNAddTwoStates(34, 34); break; case 34: - if (curChar == 92) - jjCheckNAddTwoStates(35, 35); + if ((0x6800000078000000L & l) != 0L) + jjCheckNAddStates(10, 12); break; case 35: - if ((0x6800000078000000L & l) == 0L) - break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(33, 34); - break; - case 36: if (curChar == 92) - jjCheckNAddStates(8, 10); + jjCheckNAddStates(13, 15); break; default : break; } @@ -460,66 +441,57 @@ { switch(jjstateSet[--i]) { - case 37: + case 36: + case 25: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 21) + kind = 21; + jjCheckNAddTwoStates(25, 26); + break; + case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(0, 2); - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { if (kind > 21) kind = 21; - jjCheckNAddTwoStates(33, 34); + jjCheckNAddTwoStates(25, 26); } - break; - case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { if (kind > 18) kind = 18; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddStates(0, 4); } - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(0, 2); - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - { - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(33, 34); - } break; case 15: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(3, 5); + jjAddStates(5, 7); break; - case 19: - case 20: + case 24: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 18) - kind = 18; - jjCheckNAddTwoStates(20, 21); + if (kind > 21) + kind = 21; + jjCheckNAddTwoStates(25, 26); break; - case 27: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(0, 2); - break; case 28: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(0, 2); - break; - case 32: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(33, 34); + if (kind > 18) + kind = 18; + jjCheckNAddStates(0, 4); break; - case 33: + case 29: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(33, 34); + if (kind > 18) + kind = 18; + jjCheckNAddTwoStates(29, 30); break; + case 32: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(10, 12); + break; default : break; } } while(i != startsAt); @@ -531,7 +503,7 @@ kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 37 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 36 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } @@ -634,11 +606,11 @@ break; case 2: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(11, 13); + jjCheckNAddStates(16, 18); break; case 3: if (curChar == 34) - jjCheckNAddStates(11, 13); + jjCheckNAddStates(16, 18); break; case 5: if (curChar == 34 && kind > 31) @@ -671,7 +643,7 @@ jjCheckNAdd(6); break; case 2: - jjAddStates(11, 13); + jjAddStates(16, 18); break; case 4: if (curChar == 92) @@ -702,7 +674,7 @@ break; case 2: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(11, 13); + jjAddStates(16, 18); break; default : break; } @@ -749,7 +721,7 @@ break; if (kind > 24) kind = 24; - jjAddStates(14, 15); + jjAddStates(19, 20); break; case 1: if (curChar == 46) @@ -902,11 +874,11 @@ break; case 2: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(11, 13); + jjCheckNAddStates(16, 18); break; case 3: if (curChar == 34) - jjCheckNAddStates(11, 13); + jjCheckNAddStates(16, 18); break; case 5: if (curChar == 34 && kind > 27) @@ -939,7 +911,7 @@ jjCheckNAdd(6); break; case 2: - jjAddStates(11, 13); + jjAddStates(16, 18); break; case 4: if (curChar == 92) @@ -970,7 +942,7 @@ break; case 2: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(11, 13); + jjAddStates(16, 18); break; default : break; } @@ -990,7 +962,8 @@ } } static final int[] jjnextStates = { - 28, 29, 30, 15, 17, 18, 24, 25, 22, 31, 35, 2, 4, 5, 0, 1, + 29, 32, 23, 33, 30, 15, 17, 18, 20, 21, 32, 23, 33, 31, 34, 27, + 2, 4, 5, 0, 1, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -1025,8 +998,8 @@ 0x40L, }; protected CharStream input_stream; -private final int[] jjrounds = new int[37]; -private final int[] jjstateSet = new int[74]; +private final int[] jjrounds = new int[36]; +private final int[] jjstateSet = new int[72]; protected char curChar; public QueryParserTokenManager(CharStream stream) { @@ -1048,7 +1021,7 @@ { int i; jjround = 0x80000001; - for (i = 37; i-- > 0;) + for (i = 36; i-- > 0;) jjrounds[i] = 0x80000000; } public void ReInit(CharStream stream, int lexState) Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 510176) +++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -197,8 +197,8 @@ public void testSimple() throws Exception { assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("türm term term", null, "türm term term"); - assertQueryEquals("ümlaut", null, "ümlaut"); + assertQueryEquals("türm term term", new WhitespaceAnalyzer(), "türm term term"); + assertQueryEquals("ümlaut", new WhitespaceAnalyzer(), "ümlaut"); assertQueryEquals("a AND b", null, "+a +b"); assertQueryEquals("(a AND b)", null, "+a +b"); @@ -358,6 +358,14 @@ assertWildcardQueryEquals("*Term", true, "*term", true); assertWildcardQueryEquals("?Term", true, "?term", true); } + + public void testLeadingWildcardType() throws Exception { + QueryParser qp = getParser(null); + qp.setAllowLeadingWildcard(true); + assertEquals(WildcardQuery.class, qp.parse("t*erm*").getClass()); + assertEquals(WildcardQuery.class, qp.parse("?term*").getClass()); + assertEquals(WildcardQuery.class, qp.parse("*term*").getClass()); + } public void testQPA() throws Exception { assertQueryEquals("term term term", qpAnalyzer, "term term term"); Index: src/test/org/apache/lucene/search/TestWildcard.java =================================================================== --- src/test/org/apache/lucene/search/TestWildcard.java (revision 510176) +++ src/test/org/apache/lucene/search/TestWildcard.java (working copy) @@ -19,10 +19,14 @@ import junit.framework.TestCase; import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field.Index; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.store.RAMDirectory; import java.io.IOException; @@ -152,4 +156,102 @@ Hits result = searcher.search(q); assertEquals(expectedMatches, result.length()); } + + /** + * Test that wild card queries are parsed to the correct type and are searched correctly. + * This test looks at both parsing and execution of wildcard queries. + * Although placed here, it also tests prefix queries, verifying that + * prefix queries are not parsed into wild card queries, and viceversa. + * @throws Exception + */ + public void testParsingAndSearching() throws Exception { + String field = "content"; + boolean dbg = false; + QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer()); + qp.setAllowLeadingWildcard(true); + String docs[] = { + "abcdefg1", + "hijklmn1", + "opqrstu1", + }; + // queries that should find all docs + String matchAll[] = { + "*", "*1", "*1", "*?", "*?1", + }; + // queries that should find no docs + String matchNone[] = { + "a*h", "a?h", "*a*h", "?a", "a?", + }; + // queries that should be parsed to prefix queries + String matchOneDocPrefix[][] = { + {"a*", "ab*", "abc*"}, // these should find only doc 0 + {"h*", "hi*", "hij*"}, // these should find only doc 1 + {"o*", "op*", "opq*"}, // these should find only doc 2 + }; + // queries that should be parsed to wildcard queries + String matchOneDocWild[][] = { + {"*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1"}, // these should find only doc 0 + {"*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1"}, // these should find only doc 1 + {"*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1"}, // these should find only doc 2 + }; + + // prepare the index + RAMDirectory dir = new RAMDirectory(); + IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer()); + for (int i = 0; i < docs.length; i++) { + Document doc = new Document(); + doc.add(new Field(field,docs[i],Store.NO,Index.UN_TOKENIZED)); + iw.addDocument(doc); + } + iw.close(); + + IndexSearcher searcher = new IndexSearcher(dir); + + // test queries that must find all + for (int i = 0; i < matchAll.length; i++) { + String qtxt = matchAll[i]; + Query q = qp.parse(qtxt); + if (dbg) System.out.println("matchAll: qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + Hits hits = searcher.search(q); + assertEquals(docs.length,hits.length()); + } + + // test queries that must find none + for (int i = 0; i < matchNone.length; i++) { + String qtxt = matchNone[i]; + Query q = qp.parse(qtxt); + if (dbg) System.out.println("matchNone: qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + Hits hits = searcher.search(q); + assertEquals(0,hits.length()); + } + + // test queries that must be prefix queries and must find only one doc + for (int i = 0; i < matchOneDocPrefix.length; i++) { + for (int j = 0; j < matchOneDocPrefix[i].length; j++) { + String qtxt = matchOneDocPrefix[i][j]; + Query q = qp.parse(qtxt); + if (dbg) System.out.println("match 1 prefix: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + assertEquals(PrefixQuery.class, q.getClass()); + Hits hits = searcher.search(q); + assertEquals(1,hits.length()); + assertEquals(i,hits.id(0)); + } + } + + // test queries that must be wildcard queries and must find only one doc + for (int i = 0; i < matchOneDocPrefix.length; i++) { + for (int j = 0; j < matchOneDocWild[i].length; j++) { + String qtxt = matchOneDocWild[i][j]; + Query q = qp.parse(qtxt); + if (dbg) System.out.println("match 1 wild: doc="+docs[i]+" qtxt="+qtxt+" q="+q+" "+q.getClass().getName()); + assertEquals(WildcardQuery.class, q.getClass()); + Hits hits = searcher.search(q); + assertEquals(1,hits.length()); + assertEquals(i,hits.id(0)); + } + } + + searcher.close(); + } + }