Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (revision 1024236) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (working copy) @@ -17,67 +17,82 @@ * limitations under the License. */ +import java.io.IOException; +import java.io.Reader; +import java.text.DateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.queryParser.TestQueryParser; +import org.apache.lucene.queryParser.core.QueryNodeException; +import org.apache.lucene.queryParser.core.QueryNodeParseException; +import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator; +import org.apache.lucene.queryParser.standard.parser.ParseException; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.RegexpQuery; -import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.automaton.BasicAutomata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; -import java.io.IOException; -import java.io.Reader; -import java.text.DateFormat; -import java.util.Calendar; -import java.util.GregorianCalendar; - +/** + *
+ * This test case tests {@link PrecedenceQueryParser}. + *
+ *+ * It contains all tests from {@link TestQueryParser} with some adjusted to + * fit the precedence requirement, plus some precedence test cases. + *
+ * + * @see TestQueryParser + */ public class TestPrecedenceQueryParser extends LuceneTestCase { + public static Analyzer qpAnalyzer = new QPTestAnalyzer(); public static final class QPTestFilter extends TokenFilter { /** - * Filter which discards the token 'stop' and which expands the - * token 'phrase' into 'phrase1 phrase2' + * Filter which discards the token 'stop' and which expands the token + * 'phrase' into 'phrase1 phrase2' */ public QPTestFilter(TokenStream in) { super(in); } boolean inPhrase = false; + int savedStart = 0, savedEnd = 0; CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - - @Override + public boolean incrementToken() throws IOException { - clearAttributes(); if (inPhrase) { inPhrase = false; termAtt.setEmpty().append("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; } else - while(input.incrementToken()) + while (input.incrementToken()) if (termAtt.toString().equals("phrase")) { inPhrase = true; savedStart = offsetAtt.startOffset(); @@ -94,31 +109,13 @@ public static final class QPTestAnalyzer extends Analyzer { /** Filters MockTokenizer with StopFilter. */ - @Override public final TokenStream tokenStream(String fieldName, Reader reader) { return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); } } - public static class QPTestParser extends PrecedenceQueryParser { - public QPTestParser(String f, Analyzer a) { - super(f, a); - } - - @Override - protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { - throw new ParseException("Fuzzy queries not allowed"); - } - - @Override - protected Query getWildcardQuery(String field, String termStr) throws ParseException { - throw new ParseException("Wildcard queries not allowed"); - } - } - private int originalMaxClauses; - @Override public void setUp() throws Exception { super.setUp(); originalMaxClauses = BooleanQuery.getMaxClauseCount(); @@ -127,40 +124,42 @@ public PrecedenceQueryParser getParser(Analyzer a) throws Exception { if (a == null) a = new MockAnalyzer(MockTokenizer.SIMPLE, true); - PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a); - qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR); + PrecedenceQueryParser qp = new PrecedenceQueryParser(); + qp.setAnalyzer(a); + qp.setDefaultOperator(Operator.OR); return qp; } public Query getQuery(String query, Analyzer a) throws Exception { - return getParser(a).parse(query); + return getParser(a).parse(query, "field"); } public void assertQueryEquals(String query, Analyzer a, String result) - throws Exception { + throws Exception { Query q = getQuery(query, a); String s = q.toString("field"); if (!s.equals(result)) { - fail("Query /" + query + "/ yielded /" + s - + "/, expecting /" + result + "/"); + fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result + + "/"); } } - public void assertWildcardQueryEquals(String query, boolean lowercase, String result) - throws Exception { + public void assertWildcardQueryEquals(String query, boolean lowercase, + String result) throws Exception { PrecedenceQueryParser qp = getParser(null); qp.setLowercaseExpandedTerms(lowercase); - Query q = qp.parse(query); + Query q = qp.parse(query, "field"); String s = q.toString("field"); if (!s.equals(result)) { - fail("WildcardQuery /" + query + "/ yielded /" + s - + "/, expecting /" + result + "/"); + fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /" + + result + "/"); } } - public void assertWildcardQueryEquals(String query, String result) throws Exception { + public void assertWildcardQueryEquals(String query, String result) + throws Exception { PrecedenceQueryParser qp = getParser(null); - Query q = qp.parse(query); + Query q = qp.parse(query, "field"); String s = q.toString("field"); if (!s.equals(result)) { fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /" @@ -168,22 +167,22 @@ } } - public Query getQueryDOA(String query, Analyzer a) - throws Exception { + public Query getQueryDOA(String query, Analyzer a) throws Exception { if (a == null) a = new MockAnalyzer(MockTokenizer.SIMPLE, true); - PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a); - qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR); - return qp.parse(query); + PrecedenceQueryParser qp = new PrecedenceQueryParser(); + qp.setAnalyzer(a); + qp.setDefaultOperator(Operator.AND); + return qp.parse(query, "field"); } public void assertQueryEqualsDOA(String query, Analyzer a, String result) - throws Exception { + throws Exception { Query q = getQueryDOA(query, a); String s = q.toString("field"); if (!s.equals(result)) { - fail("Query /" + query + "/ yielded /" + s - + "/, expecting /" + result + "/"); + fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result + + "/"); } } @@ -193,8 +192,8 @@ assertQueryEquals("", null, ""); assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("türm term term", null, "türm term term"); - assertQueryEquals("ümlaut", null, "ümlaut"); + assertQueryEquals("türm term term", null, "türm term term"); + assertQueryEquals("ümlaut", null, "ümlaut"); assertQueryEquals("+a", null, "+a"); assertQueryEquals("-a", null, "-a"); @@ -212,9 +211,9 @@ assertQueryEquals("+term -term term", null, "+term -term term"); assertQueryEquals("foo:term AND field:anotherTerm", null, - "+foo:term +anotherterm"); + "+foo:term +anotherterm"); assertQueryEquals("term AND \"phrase phrase\"", null, - "+term +\"phrase phrase\""); + "+term +\"phrase phrase\""); assertQueryEquals("\"hello there\"", null, "\"hello there\""); assertTrue(getQuery("a AND b", null) instanceof BooleanQuery); assertTrue(getQuery("hello", null) instanceof TermQuery); @@ -229,21 +228,21 @@ assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0"); assertQueryEquals("(foo OR bar) AND (baz OR boo)", null, - "+(foo bar) +(baz boo)"); - assertQueryEquals("((a OR b) AND NOT c) OR d", null, - "(+(a b) -c) d"); + "+(foo bar) +(baz boo)"); + assertQueryEquals("((a OR b) AND NOT c) OR d", null, "(+(a b) -c) d"); assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, - "+(apple \"steve jobs\") -(foo bar baz)"); + "+(apple \"steve jobs\") -(foo bar baz)"); assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, - "+(title:dog title:cat) -author:\"bob dole\""); - - PrecedenceQueryParser qp = new PrecedenceQueryParser("field", new MockAnalyzer()); + "+(title:dog title:cat) -author:\"bob dole\""); + + PrecedenceQueryParser qp = new PrecedenceQueryParser(); + qp.setAnalyzer(new MockAnalyzer()); // make sure OR is the default: - assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator()); - qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR); - assertEquals(PrecedenceQueryParser.AND_OPERATOR, qp.getDefaultOperator()); - qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR); - assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator()); + assertEquals(Operator.OR, qp.getDefaultOperator()); + qp.setDefaultOperator(Operator.AND); + assertEquals(Operator.AND, qp.getDefaultOperator()); + qp.setDefaultOperator(Operator.OR); + assertEquals(Operator.OR, qp.getDefaultOperator()); assertQueryEquals("a OR !b", null, "a (-b)"); assertQueryEquals("a OR ! b", null, "a (-b)"); @@ -266,101 +265,17 @@ } public void testNumber() throws Exception { -// The numbers go away because SimpleAnalzyer ignores them + // The numbers go away because SimpleAnalzyer ignores them assertQueryEquals("3", null, ""); assertQueryEquals("term 1.0 1 2", null, "term"); assertQueryEquals("term term1 term2", null, "term term term"); - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true); + Analyzer a = new MockAnalyzer(); assertQueryEquals("3", a, "3"); assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); assertQueryEquals("term term1 term2", a, "term term1 term2"); } - //individual CJK chars as terms, like StandardAnalyzer - private class SimpleCJKTokenizer extends Tokenizer { - private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - - public SimpleCJKTokenizer(Reader input) { - super(input); - } - - @Override - public boolean incrementToken() throws IOException { - int ch = input.read(); - if (ch < 0) - return false; - clearAttributes(); - termAtt.setEmpty().append((char) ch); - return true; - } - } - - private class SimpleCJKAnalyzer extends Analyzer { - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new SimpleCJKTokenizer(reader); - } - } - - public void testCJKTerm() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - BooleanQuery expected = new BooleanQuery(); - expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD); - expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); - - assertEquals(expected, getQuery("中国", analyzer)); - } - - public void testCJKBoostedTerm() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - BooleanQuery expected = new BooleanQuery(); - expected.setBoost(0.5f); - expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD); - expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); - - assertEquals(expected, getQuery("中国^0.5", analyzer)); - } - - public void testCJKPhrase() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - PhraseQuery expected = new PhraseQuery(); - expected.add(new Term("field", "中")); - expected.add(new Term("field", "国")); - - assertEquals(expected, getQuery("\"中国\"", analyzer)); - } - - public void testCJKBoostedPhrase() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - PhraseQuery expected = new PhraseQuery(); - expected.setBoost(0.5f); - expected.add(new Term("field", "中")); - expected.add(new Term("field", "国")); - - assertEquals(expected, getQuery("\"中国\"^0.5", analyzer)); - } - - public void testCJKSloppyPhrase() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - PhraseQuery expected = new PhraseQuery(); - expected.setSlop(3); - expected.add(new Term("field", "中")); - expected.add(new Term("field", "国")); - - assertEquals(expected, getQuery("\"中国\"~3", analyzer)); - } - // failing tests disabled since PrecedenceQueryParser // is currently unmaintained public void _testWildcard() throws Exception { @@ -377,24 +292,25 @@ assertTrue(getQuery("term*^2", null) instanceof PrefixQuery); assertTrue(getQuery("term~", null) instanceof FuzzyQuery); assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery); - FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null); + FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null); assertEquals(0.7f, fq.getMinSimilarity(), 0.1f); assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); - fq = (FuzzyQuery)getQuery("term~", null); + fq = (FuzzyQuery) getQuery("term~", null); assertEquals(0.5f, fq.getMinSimilarity(), 0.1f); assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); try { - getQuery("term~1.1", null); // value > 1, throws exception + getQuery("term~1.1", null); // value > 1, throws exception fail(); - } catch(ParseException pe) { + } catch (ParseException pe) { // expected exception } assertTrue(getQuery("term*germ", null) instanceof WildcardQuery); -/* Tests to see that wild card terms are (or are not) properly - * lower-cased with propery parser configuration - */ -// First prefix queries: + /* + * Tests to see that wild card terms are (or are not) properly lower-cased + * with propery parser configuration + */ + // First prefix queries: // by default, convert to lowercase: assertWildcardQueryEquals("Term*", true, "term*"); // explicitly set lowercase: @@ -405,7 +321,7 @@ assertWildcardQueryEquals("term*", false, "term*"); assertWildcardQueryEquals("Term*", false, "Term*"); assertWildcardQueryEquals("TERM*", false, "TERM*"); -// Then 'full' wildcard queries: + // Then 'full' wildcard queries: // by default, convert to lowercase: assertWildcardQueryEquals("Te?m", "te?m"); // explicitly set lowercase: @@ -418,11 +334,11 @@ assertWildcardQueryEquals("Te?m", false, "Te?m"); assertWildcardQueryEquals("TE?M", false, "TE?M"); assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM"); -// Fuzzy queries: + // Fuzzy queries: assertWildcardQueryEquals("Term~", "term~0.5"); assertWildcardQueryEquals("Term~", true, "term~0.5"); assertWildcardQueryEquals("Term~", false, "Term~0.5"); -// Range queries: + // Range queries: assertWildcardQueryEquals("[A TO C]", "[a TO c]"); assertWildcardQueryEquals("[A TO C]", true, "[a TO c]"); assertWildcardQueryEquals("[A TO C]", false, "[A TO C]"); @@ -434,11 +350,11 @@ assertQueryEquals("term -stop term", qpAnalyzer, "term term"); assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll"); assertQueryEquals("term phrase term", qpAnalyzer, - "term (phrase1 phrase2) term"); + "term (phrase1 phrase2) term"); // note the parens in this next assertion differ from the original // QueryParser behavior assertQueryEquals("term AND NOT phrase term", qpAnalyzer, - "(+term -(phrase1 phrase2)) term"); + "(+term -(phrase1 phrase2)) term"); assertQueryEquals("stop", qpAnalyzer, ""); assertQueryEquals("stop OR stop AND stop", qpAnalyzer, ""); assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery); @@ -455,9 +371,10 @@ assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar"); assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar"); assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}"); - assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})"); + assertQueryEquals("gack ( bar blar { a TO z}) ", null, + "gack (bar blar {a TO z})"); } - + private String escapeDateString(String s) { if (s.contains(" ")) { return "\"" + s + "\""; @@ -471,54 +388,107 @@ return DateTools.dateToString(df.parse(s), DateTools.Resolution.DAY); } - public String getLocalizedDate(int year, int month, int day) { + private String getLocalizedDate(int year, int month, int day, + boolean extendLastDate) { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); Calendar calendar = new GregorianCalendar(); - calendar.clear(); calendar.set(year, month, day); - calendar.set(Calendar.HOUR_OF_DAY, 23); - calendar.set(Calendar.MINUTE, 59); - calendar.set(Calendar.SECOND, 59); - calendar.set(Calendar.MILLISECOND, 999); + if (extendLastDate) { + calendar.set(Calendar.HOUR_OF_DAY, 23); + calendar.set(Calendar.MINUTE, 59); + calendar.set(Calendar.SECOND, 59); + calendar.set(Calendar.MILLISECOND, 999); + } return df.format(calendar.getTime()); } public void testDateRange() throws Exception { - String startDate = getLocalizedDate(2002, 1, 1); - String endDate = getLocalizedDate(2002, 1, 4); - assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null, - "[" + getDate(startDate) + " TO " + getDate(endDate) + "]"); - assertQueryEquals("{ " + escapeDateString(startDate) + " " + escapeDateString(endDate) + " }", null, - "{" + getDate(startDate) + " TO " + getDate(endDate) + "}"); + String startDate = getLocalizedDate(2002, 1, 1, false); + String endDate = getLocalizedDate(2002, 1, 4, false); + Calendar endDateExpected = new GregorianCalendar(); + endDateExpected.set(2002, 1, 4, 23, 59, 59); + endDateExpected.set(Calendar.MILLISECOND, 999); + final String defaultField = "default"; + final String monthField = "month"; + final String hourField = "hour"; + PrecedenceQueryParser qp = new PrecedenceQueryParser(new MockAnalyzer()); + + // Don't set any date resolution and verify if DateField is used + assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, + endDateExpected.getTime(), null); + + Map+) or a minus (-) sign, indicating
- * that the clause is required or prohibited respectively; or
- * +/- prefix to require any of a set of
- * terms.
- *
- * Query ::= ( Clause )*
- * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
- *
- *
* - * Examples of appropriately formatted queries can be found in the query syntax - * documentation. + * This query parser works exactly as the standard query parser ( {@link StandardQueryParser} ), + * except that it respect the boolean precedence, so <a AND b OR c AND d> is parsed to <(+a +b) (+c +d)> + * instead of <+a +b +c +d>. *
+ *+ * EXPERT: This class extends {@link StandardQueryParser}, but uses {@link PrecedenceQueryNodeProcessorPipeline} + * instead of {@link StandardQueryNodeProcessorPipeline} to process the query tree. + *
+ * + * @see StandardQueryParser */ -public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { - - private static final int CONJ_NONE = 0; - private static final int CONJ_AND = 1; - private static final int CONJ_OR = 2; - - private static final int MOD_NONE = 0; - private static final int MOD_NOT = 10; - private static final int MOD_REQ = 11; - - // make it possible to call setDefaultOperator() without accessing - // the nested class: - public static final Operator AND_OPERATOR = Operator.AND; - public static final Operator OR_OPERATOR = Operator.OR; - - /** The actual operator that parser uses to combine query terms */ - private Operator operator = OR_OPERATOR; - - boolean lowercaseExpandedTerms = true; - MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; - - Analyzer analyzer; - String field; - int phraseSlop = 0; - float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; - int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; - Locale locale = Locale.getDefault(); - - static enum Operator { OR, AND } - - /** Constructs a query parser. - * @param f the default field for query terms. - * @param a used to find terms in the query text. - */ - public PrecedenceQueryParser(String f, Analyzer a) { - this(new FastCharStream(new StringReader(""))); - analyzer = a; - field = f; - } - - /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. - * @param expression the query string to be parsed. - * @throws ParseException if the parsing fails - */ - public Query parse(String expression) throws ParseException { - // optimize empty query to be empty BooleanQuery - if (expression == null || expression.trim().length() == 0) { - return new BooleanQuery(); - } - - ReInit(new FastCharStream(new StringReader(expression))); - try { - Query query = Query(field); - return (query != null) ? query : new BooleanQuery(); - } - catch (TokenMgrError tme) { - throw new ParseException(tme.getMessage()); - } - catch (BooleanQuery.TooManyClauses tmc) { - throw new ParseException("Too many boolean clauses"); - } - } - - /** - * @return Returns the analyzer. - */ - public Analyzer getAnalyzer() { - return analyzer; - } - +public class PrecedenceQueryParser extends StandardQueryParser { + /** - * @return Returns the field. + * @see StandardQueryParser#StandardQueryParser() */ - public String getField() { - return field; + public PrecedenceQueryParser() { + setQueryNodeProcessor(new PrecedenceQueryNodeProcessorPipeline(getQueryConfigHandler())); } - - /** - * Get the minimal similarity for fuzzy queries. - */ - public float getFuzzyMinSim() { - return fuzzyMinSim; - } - + /** - * Set the minimum similarity for fuzzy queries. - * Default is 2f. + * @see StandardQueryParser#StandardQueryParser(Analyzer) */ - public void setFuzzyMinSim(float fuzzyMinSim) { - this.fuzzyMinSim = fuzzyMinSim; + public PrecedenceQueryParser(Analyzer analyer) { + super(analyer); + + setQueryNodeProcessor(new PrecedenceQueryNodeProcessorPipeline(getQueryConfigHandler())); + } - /** - * Get the prefix length for fuzzy queries. - * @return Returns the fuzzyPrefixLength. - */ - public int getFuzzyPrefixLength() { - return fuzzyPrefixLength; - } - - /** - * Set the prefix length for fuzzy queries. Default is 0. - * @param fuzzyPrefixLength The fuzzyPrefixLength to set. - */ - public void setFuzzyPrefixLength(int fuzzyPrefixLength) { - this.fuzzyPrefixLength = fuzzyPrefixLength; - } - - /** - * Sets the default slop for phrases. If zero, then exact phrase matches - * are required. Default value is zero. - */ - public void setPhraseSlop(int phraseSlop) { - this.phraseSlop = phraseSlop; - } - - /** - * Gets the default slop for phrases. - */ - public int getPhraseSlop() { - return phraseSlop; - } - - /** - * Sets the boolean operator of the QueryParser. - * In default mode (OR_OPERATOR) terms without any modifiers
- * are considered optional: for example capital of Hungary is equal to
- * capital OR of OR Hungary.AND_OPERATOR mode terms are considered to be in conjunction: the
- * above mentioned query is parsed as capital AND of AND Hungary
- */
- public void setDefaultOperator(Operator op) {
- this.operator = op;
- }
-
- /**
- * Gets implicit operator setting, which will be either AND_OPERATOR
- * or OR_OPERATOR.
- */
- public Operator getDefaultOperator() {
- return operator;
- }
-
- /**
- * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
- * lower-cased or not. Default is true.
- */
- public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
- this.lowercaseExpandedTerms = lowercaseExpandedTerms;
- }
-
- /**
- * @see #setLowercaseExpandedTerms(boolean)
- */
- public boolean getLowercaseExpandedTerms() {
- return lowercaseExpandedTerms;
- }
- /**
- * By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
- * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
- * a) Runs faster b) Does not have the scarcity of terms unduly influence score
- * c) avoids any "TooManyBooleanClauses" exception.
- * However, if your application really needs to use the
- * old-fashioned BooleanQuery expansion rewriting and the above
- * points are not relevant then use this to change
- * the rewrite method.
- */
- public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
- multiTermRewriteMethod = method;
- }
-
-
- /**
- * @see #setMultiTermRewriteMethod
- */
- public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
- return multiTermRewriteMethod;
- }
-
- /**
- * Set locale used by date range parsing.
- */
- public void setLocale(Locale locale) {
- this.locale = locale;
- }
-
- /**
- * Returns current locale, allowing access by subclasses.
- */
- public Locale getLocale() {
- return locale;
- }
-
- protected void addClause(List- * Depending on settings, prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *
- * Can be overridden by extending classes, to provide custom handling for - * wildcard queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains one or more wild card - * characters (? or *), but is not simple prefix term - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getWildcardQuery(String field, String termStr) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - final WildcardQuery query = new WildcardQuery(t); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating a query (similar to - * {@link #getWildcardQuery}). Called when parser parses an input term - * token that uses prefix notation; that is, contains a single '*' wildcard - * character as its last character. Since this is a special case - * of generic wildcard term, and such a query can be optimized easily, - * this usually results in a different query object. - *
- * Depending on settings, a prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *
- * Can be overridden by extending classes, to provide custom handling for - * wild card queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * (without trailing '*' character!) - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getPrefixQuery(String field, String termStr) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - final PrefixQuery query = new PrefixQuery(t); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating a query. Called when parser - * parses an input term token that contains a regular expression - * query. - *
- * Depending on settings, pattern term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with regular expression templates. - *
- * Can be overridden by extending classes, to provide custom handling for
- * regular expression queries, which may be necessary due to missing analyzer
- * calls.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token that contains a regular expression
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getRegexpQuery(String field, String termStr) throws ParseException
- {
- if (lowercaseExpandedTerms) {
- termStr = termStr.toLowerCase();
- }
- final Term regexp = new Term(field, termStr);
- final RegexpQuery query = new RegexpQuery(regexp);
- query.setRewriteMethod(multiTermRewriteMethod);
- return query;
- }
-
- /**
- * Factory method for generating a query (similar to
- * {@link #getWildcardQuery}). Called when parser parses
- * an input term token that has the fuzzy suffix (~) appended.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
- {
- if (lowercaseExpandedTerms) {
- termStr = termStr.toLowerCase();
- }
- Term t = new Term(field, termStr);
- return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
- }
-
- /**
- * Returns a String where the escape char has been
- * removed, or kept only once if there was a double escape.
- */
- private String discardEscapeChar(String input) {
- char[] caSource = input.toCharArray();
- char[] caDest = new char[caSource.length];
- int j = 0;
- for (int i = 0; i < caSource.length; i++) {
- if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
- caDest[j++]=caSource[i];
- }
- }
- return new String(caDest, 0, j);
- }
-
- /**
- * Returns a String where those characters that QueryParser
- * expects to be escaped are escaped by a preceding
- * Examples of appropriately formatted queries can be found in the query syntax
- * documentation.
- *
- * Depending on settings, prefix term may be lower-cased
- * automatically. It will not go through the default Analyzer,
- * however, since normal Analyzers are unlikely to work properly
- * with wildcard templates.
- *
- * Can be overridden by extending classes, to provide custom handling for
- * wildcard queries, which may be necessary due to missing analyzer calls.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token that contains one or more wild card
- * characters (? or *), but is not simple prefix term
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getWildcardQuery(String field, String termStr) throws ParseException
- {
- if (lowercaseExpandedTerms) {
- termStr = termStr.toLowerCase();
- }
- Term t = new Term(field, termStr);
- final WildcardQuery query = new WildcardQuery(t);
- query.setRewriteMethod(multiTermRewriteMethod);
- return query;
- }
-
- /**
- * Factory method for generating a query (similar to
- * {@link #getWildcardQuery}). Called when parser parses an input term
- * token that uses prefix notation; that is, contains a single '*' wildcard
- * character as its last character. Since this is a special case
- * of generic wildcard term, and such a query can be optimized easily,
- * this usually results in a different query object.
- *
- * Depending on settings, a prefix term may be lower-cased
- * automatically. It will not go through the default Analyzer,
- * however, since normal Analyzers are unlikely to work properly
- * with wildcard templates.
- *
- * Can be overridden by extending classes, to provide custom handling for
- * wild card queries, which may be necessary due to missing analyzer calls.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
- * (without trailing '*' character!)
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getPrefixQuery(String field, String termStr) throws ParseException
- {
- if (lowercaseExpandedTerms) {
- termStr = termStr.toLowerCase();
- }
- Term t = new Term(field, termStr);
- final PrefixQuery query = new PrefixQuery(t);
- query.setRewriteMethod(multiTermRewriteMethod);
- return query;
- }
-
- /**
- * Factory method for generating a query. Called when parser
- * parses an input term token that contains a regular expression
- * query.
- *
- * Depending on settings, pattern term may be lower-cased
- * automatically. It will not go through the default Analyzer,
- * however, since normal Analyzers are unlikely to work properly
- * with regular expression templates.
- *
- * Can be overridden by extending classes, to provide custom handling for
- * regular expression queries, which may be necessary due to missing analyzer
- * calls.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token that contains a regular expression
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getRegexpQuery(String field, String termStr) throws ParseException
- {
- if (lowercaseExpandedTerms) {
- termStr = termStr.toLowerCase();
- }
- final Term regexp = new Term(field, termStr);
- final RegexpQuery query = new RegexpQuery(regexp);
- query.setRewriteMethod(multiTermRewriteMethod);
- return query;
- }
-
- /**
- * Factory method for generating a query (similar to
- * {@link #getWildcardQuery}). Called when parser parses
- * an input term token that has the fuzzy suffix (~) appended.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
- {
- if (lowercaseExpandedTerms) {
- termStr = termStr.toLowerCase();
- }
- Term t = new Term(field, termStr);
- return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
- }
-
- /**
- * Returns a String where the escape char has been
- * removed, or kept only once if there was a double escape.
- */
- private String discardEscapeChar(String input) {
- char[] caSource = input.toCharArray();
- char[] caDest = new char[caSource.length];
- int j = 0;
- for (int i = 0; i < caSource.length; i++) {
- if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
- caDest[j++]=caSource[i];
- }
- }
- return new String(caDest, 0, j);
- }
-
- /**
- * Returns a String where those characters that QueryParser
- * expects to be escaped are escaped by a preceding
+ * This processor pipeline extends {@link StandardQueryNodeProcessorPipeline} and enables
+ * boolean precedence on it.
+ *
+ * EXPERT: the precedence is enabled by removing {@link GroupQueryNodeProcessor} from the
+ * {@link StandardQueryNodeProcessorPipeline} and appending {@link BooleanModifiersQueryNodeProcessor}
+ * to the pipeline.
+ *
+ * This processor is used to apply the correct {@link ModifierQueryNode} to {@link BooleanQueryNode}s children.
+ *
+ * It walks through the query node tree looking for {@link BooleanQueryNode}s. If an {@link AndQueryNode} is found,
+ * every children, which is not a {@link ModifierQueryNode} or the {@link ModifierQueryNode}
+ * is {@link Modifier#MOD_NONE}, becomes a {@link Modifier#MOD_REQ}. For any other
+ * {@link BooleanQueryNode} which is not an {@link OrQueryNode}, it checks the default operator is {@link Operator#AND},
+ * if it is, the same operation when an {@link AndQueryNode} is found is applied to it.
+ *
+This package contains the 2 {@link org.apache.lucene.queryParser.core.processors.QueryNodeProcessor}s used by
+{@link org.apache.lucene.queryParser.precedence.PrecedenceQueryParser}.
+
+{@link org.apache.lucene.queryParser.precedence.processors.BooleanModifiersQueryNodeProcessor}: this processor
+is used to apply {@link org.apache.lucene.queryParser.core.nodes.ModifierQueryNode}s on
+{@link org.apache.lucene.queryParser.core.nodes.BooleanQueryNode} children according to the boolean type
+or the default operator.
+
+{@link org.apache.lucene.queryParser.precedence.processors.PrecedenceQueryNodeProcessorPipeline}: this
+processor pipeline is used by {@link org.apache.lucene.queryParser.precedence.PrecedenceQueryParser}. It extends
+{@link org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline} and rearrange
+the pipeline so the boolean precedence is processed correctly. Check {@link org.apache.lucene.queryParser.precedence.processors.PrecedenceQueryNodeProcessorPipeline}
+for more details.
+ Note that
- * this does not do line-number counting, but instead keeps track of the
- * character position of the token in the input, as required by Lucene's {@link
- * org.apache.lucene.analysis.Token} API. */
-public final class FastCharStream implements CharStream {
- char[] buffer = null;
-
- int bufferLength = 0; // end of valid chars
- int bufferPosition = 0; // next char to read
-
- int tokenStart = 0; // offset in buffer
- int bufferStart = 0; // position in file of buffer
-
- Reader input; // source of chars
-
- /** Constructs from a Reader. */
- public FastCharStream(Reader r) {
- input = r;
- }
-
- public final char readChar() throws IOException {
- if (bufferPosition >= bufferLength)
- refill();
- return buffer[bufferPosition++];
- }
-
- private final void refill() throws IOException {
- int newPosition = bufferLength - tokenStart;
-
- if (tokenStart == 0) { // token won't fit in buffer
- if (buffer == null) { // first time: alloc buffer
- buffer = new char[2048];
- } else if (bufferLength == buffer.length) { // grow buffer
- char[] newBuffer = new char[buffer.length*2];
- System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
- buffer = newBuffer;
- }
- } else { // shift token to front
- System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
- }
-
- bufferLength = newPosition; // update state
- bufferPosition = newPosition;
- bufferStart += tokenStart;
- tokenStart = 0;
-
- int charsRead = // fill space in buffer
- input.read(buffer, newPosition, buffer.length-newPosition);
- if (charsRead == -1)
- throw new IOException("read past eof");
- else
- bufferLength += charsRead;
- }
-
- public final char BeginToken() throws IOException {
- tokenStart = bufferPosition;
- return readChar();
- }
-
- public final void backup(int amount) {
- bufferPosition -= amount;
- }
-
- public final String GetImage() {
- return new String(buffer, tokenStart, bufferPosition - tokenStart);
- }
-
- public final char[] GetSuffix(int len) {
- char[] value = new char[len];
- System.arraycopy(buffer, bufferPosition - len, value, 0, len);
- return value;
- }
-
- public final void Done() {
- try {
- input.close();
- } catch (IOException e) {
- System.err.println("Caught: " + e + "; ignoring.");
- }
- }
-
- public final int getColumn() {
- return bufferStart + bufferPosition;
- }
- public final int getLine() {
- return 1;
- }
- public final int getEndColumn() {
- return bufferStart + bufferPosition;
- }
- public final int getEndLine() {
- return 1;
- }
- public final int getBeginColumn() {
- return bufferStart + tokenStart;
- }
- public final int getBeginLine() {
- return 1;
- }
-}
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java (revision 1024236)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java (working copy)
@@ -1,112 +0,0 @@
-/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
-/* JavaCCOptions:STATIC=false */
-package org.apache.lucene.queryParser.precedence;
-
-/**
- * This interface describes a character stream that maintains line and
- * column number positions of the characters. It also has the capability
- * to backup the stream to some extent. An implementation of this
- * interface is used in the TokenManager implementation generated by
- * JavaCCParser.
- *
- * All the methods except backup can be implemented in any fashion. backup
- * needs to be implemented correctly for the correct operation of the lexer.
- * Rest of the methods are all used to get information like line number,
- * column number and the String that constitutes a token and are not used
- * by the lexer. Hence their implementation won't affect the generated lexer's
- * operation.
- */
-
-public interface CharStream {
-
- /**
- * Returns the next character from the selected input. The method
- * of selecting the input is the responsibility of the class
- * implementing this interface. Can throw any java.io.IOException.
- */
- char readChar() throws java.io.IOException;
-
- /**
- * Returns the column position of the character last read.
- * @deprecated
- * @see #getEndColumn
- */
- int getColumn();
-
- /**
- * Returns the line number of the character last read.
- * @deprecated
- * @see #getEndLine
- */
- int getLine();
-
- /**
- * Returns the column number of the last character for current token (being
- * matched after the last call to BeginTOken).
- */
- int getEndColumn();
-
- /**
- * Returns the line number of the last character for current token (being
- * matched after the last call to BeginTOken).
- */
- int getEndLine();
-
- /**
- * Returns the column number of the first character for current token (being
- * matched after the last call to BeginTOken).
- */
- int getBeginColumn();
-
- /**
- * Returns the line number of the first character for current token (being
- * matched after the last call to BeginTOken).
- */
- int getBeginLine();
-
- /**
- * Backs up the input stream by amount steps. Lexer calls this method if it
- * had already read some characters, but could not use them to match a
- * (longer) token. So, they will be used again as the prefix of the next
- * token and it is the implemetation's responsibility to do this right.
- */
- void backup(int amount);
-
- /**
- * Returns the next character that marks the beginning of the next token.
- * All characters must remain in the buffer between two successive calls
- * to this method to implement backup correctly.
- */
- char BeginToken() throws java.io.IOException;
-
- /**
- * Returns a string made up of characters from the marked token beginning
- * to the current buffer position. Implementations have the choice of returning
- * anything that they want to. For example, for efficiency, one might decide
- * to just return null, which is a valid implementation.
- */
- String GetImage();
-
- /**
- * Returns an array of characters that make up the suffix of length 'len' for
- * the currently matched token. This is used to build up the matched string
- * for use in actions in the case of MORE. A simple and inefficient
- * implementation of this is as follows :
- *
- * {
- * String t = GetImage();
- * return t.substring(t.length() - len, t.length()).toCharArray();
- * }
- */
- char[] GetSuffix(int len);
-
- /**
- * The lexer calls this function to indicate that it is done with the stream
- * and hence implementations can free any resources held by this class.
- * Again, the body of this function can be just empty and it will not
- * affect the lexer's operation.
- */
- void Done();
-
-}
-/* JavaCC - OriginalChecksum=8cc617b193267dc876ef9699367c8186 (do not edit this line) */
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java (revision 1024236)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java (working copy)
@@ -1,198 +0,0 @@
-/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */
-/* JavaCCOptions:KEEP_LINE_COL=null */
-package org.apache.lucene.queryParser.precedence;
-
-/**
- * This exception is thrown when parse errors are encountered.
- * You can explicitly create objects of this exception type by
- * calling the method generateParseException in the generated
- * parser.
- *
- * You can modify this class to customize your error reporting
- * mechanisms so long as you retain the public fields.
- */
-public class ParseException extends Exception {
-
- /**
- * This constructor is used by the method "generateParseException"
- * in the generated parser. Calling this constructor generates
- * a new object of this type with the fields "currentToken",
- * "expectedTokenSequences", and "tokenImage" set. The boolean
- * flag "specialConstructor" is also set to true to indicate that
- * this constructor was used to create this object.
- * This constructor calls its super class with the empty string
- * to force the "toString" method of parent class "Throwable" to
- * print the error message in the form:
- * ParseException:
+The Precedence Query Parser extends the Standard Query Parser and enables
+the boolean precedence. So, the query is parsed to
+<(+a +b) (+c +d)> instead of <+a +b +c +d>.
+
+Check {@link org.apache.lucene.queryParser.standard.StandardQueryParser} for more details about the
+supported syntax and query parser functionalities.
+\.
- */
- public static String escape(String s) {
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < s.length(); i++) {
- char c = s.charAt(i);
- // NOTE: keep this in sync with _ESCAPED_CHAR below!
- if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
- || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
- || c == '*' || c == '?') {
- sb.append('\\');
- }
- sb.append(c);
- }
- return sb.toString();
- }
-
-// * Query ::= ( Clause )*
-// * Clause ::= ["+", "-"] [
- * java org.apache.lucene.queryParser.QueryParser <input>
- */
-// public static void main(String[] args) throws Exception {
-// if (args.length == 0) {
-// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
-// System.exit(0);
-// }
-// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
-// new org.apache.lucene.analysis.SimpleAnalyzer());
-// Query q = qp.parse(args[0]);
-// System.out.println(q.toString("field"));
-// }
}
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java (revision 1024236)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java (working copy)
@@ -1,124 +0,0 @@
-/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */
-/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */
-package org.apache.lucene.queryParser.precedence;
-
-/**
- * Describes the input token stream.
- */
-
-public class Token {
-
- /**
- * An integer that describes the kind of this token. This numbering
- * system is determined by JavaCCParser, and a table of these numbers is
- * stored in the file ...Constants.java.
- */
- public int kind;
-
- /** The line number of the first character of this Token. */
- public int beginLine;
- /** The column number of the first character of this Token. */
- public int beginColumn;
- /** The line number of the last character of this Token. */
- public int endLine;
- /** The column number of the last character of this Token. */
- public int endColumn;
-
- /**
- * The string image of the token.
- */
- public String image;
-
- /**
- * A reference to the next regular (non-special) token from the input
- * stream. If this is the last token from the input stream, or if the
- * token manager has not read tokens beyond this one, this field is
- * set to null. This is true only if this token is also a regular
- * token. Otherwise, see below for a description of the contents of
- * this field.
- */
- public Token next;
-
- /**
- * This field is used to access special tokens that occur prior to this
- * token, but after the immediately preceding regular (non-special) token.
- * If there are no such special tokens, this field is set to null.
- * When there are more than one such special token, this field refers
- * to the last of these special tokens, which in turn refers to the next
- * previous special token through its specialToken field, and so on
- * until the first special token (whose specialToken field is null).
- * The next fields of special tokens refer to other special tokens that
- * immediately follow it (without an intervening regular token). If there
- * is no such token, this field is null.
- */
- public Token specialToken;
-
- /**
- * An optional attribute value of the Token.
- * Tokens which are not used as syntactic sugar will often contain
- * meaningful values that will be used later on by the compiler or
- * interpreter. This attribute value is often different from the image.
- * Any subclass of Token that actually wants to return a non-null value can
- * override this method as appropriate.
- */
- public Object getValue() {
- return null;
- }
-
- /**
- * No-argument constructor
- */
- public Token() {}
-
- /**
- * Constructs a new token for the specified Image.
- */
- public Token(int kind)
- {
- this(kind, null);
- }
-
- /**
- * Constructs a new token for the specified Image and Kind.
- */
- public Token(int kind, String image)
- {
- this.kind = kind;
- this.image = image;
- }
-
- /**
- * Returns the image.
- */
- public String toString()
- {
- return image;
- }
-
- /**
- * Returns a new Token object, by default. However, if you want, you
- * can create and return subclass objects based on the value of ofKind.
- * Simply add the cases to the switch for all those special cases.
- * For example, if you have a subclass of Token called IDToken that
- * you want to create if ofKind is ID, simply add something like :
- *
- * case MyParserConstants.ID : return new IDToken(ofKind, image);
- *
- * to the following switch statement. Then you can cast matchedToken
- * variable to the appropriate type and use sit in your lexical actions.
- */
- public static Token newToken(int ofKind, String image)
- {
- switch(ofKind)
- {
- default : return new Token(ofKind, image);
- }
- }
-
- public static Token newToken(int ofKind)
- {
- return newToken(ofKind, null);
- }
-
-}
-/* JavaCC - OriginalChecksum=0dc5808f2ab8aac8775ea9175fa2cb51 (do not edit this line) */
Index: lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java
===================================================================
--- lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java (revision 1024236)
+++ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java (working copy)
@@ -1,141 +0,0 @@
-/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */
-/* JavaCCOptions: */
-package org.apache.lucene.queryParser.precedence;
-
-/** Token Manager Error. */
-@SuppressWarnings("serial")
-public class TokenMgrError extends Error
-{
-
- /*
- * Ordinals for various reasons why an Error of this type can be thrown.
- */
-
- /**
- * Lexical error occurred.
- */
- static final int LEXICAL_ERROR = 0;
-
- /**
- * An attempt was made to create a second instance of a static token manager.
- */
- static final int STATIC_LEXER_ERROR = 1;
-
- /**
- * Tried to change to an invalid lexical state.
- */
- static final int INVALID_LEXICAL_STATE = 2;
-
- /**
- * Detected (and bailed out of) an infinite loop in the token manager.
- */
- static final int LOOP_DETECTED = 3;
-
- /**
- * Indicates the reason why the exception is thrown. It will have
- * one of the above 4 values.
- */
- int errorCode;
-
- /**
- * Replaces unprintable characters by their escaped (or unicode escaped)
- * equivalents in the given string
- */
- protected static final String addEscapes(String str) {
- StringBuffer retval = new StringBuffer();
- char ch;
- for (int i = 0; i < str.length(); i++) {
- switch (str.charAt(i))
- {
- case 0 :
- continue;
- case '\b':
- retval.append("\\b");
- continue;
- case '\t':
- retval.append("\\t");
- continue;
- case '\n':
- retval.append("\\n");
- continue;
- case '\f':
- retval.append("\\f");
- continue;
- case '\r':
- retval.append("\\r");
- continue;
- case '\"':
- retval.append("\\\"");
- continue;
- case '\'':
- retval.append("\\\'");
- continue;
- case '\\':
- retval.append("\\\\");
- continue;
- default:
- if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
- String s = "0000" + Integer.toString(ch, 16);
- retval.append("\\u" + s.substring(s.length() - 4, s.length()));
- } else {
- retval.append(ch);
- }
- continue;
- }
- }
- return retval.toString();
- }
-
- /**
- * Returns a detailed message for the Error when it is thrown by the
- * token manager to indicate a lexical error.
- * Parameters :
- * EOFSeen : indicates if EOF caused the lexical error
- * curLexState : lexical state in which this error occurred
- * errorLine : line number when the error occurred
- * errorColumn : column number when the error occurred
- * errorAfter : prefix that was seen before this error occurred
- * curchar : the offending character
- * Note: You can customize the lexical error message by modifying this method.
- */
- protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) {
- return("Lexical error at line " +
- errorLine + ", column " +
- errorColumn + ". Encountered: " +
- (EOFSeen ? "
- *
- *
- * This class is generated by JavaCC. The only method that clients should need
- * to call is {@link #parse(String)}.
- *
- * The syntax for query strings is as follows:
- * A Query is a series of clauses.
- * A clause may be prefixed by:
- *
- *
- *
- * A clause may be either:
- * +) or a minus (-) sign, indicating
- * that the clause is required or prohibited respectively; or
- *
- *
- *
- * Thus, in BNF, the query grammar is:
- * +/- prefix to require any of a set of
- * terms.
- *
- * Query ::= ( Clause )*
- * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
- *
- *
- * OR_OPERATOR) terms without any modifiers
- * are considered optional: for example capital of Hungary is equal to
- * capital OR of OR Hungary.
- * In AND_OPERATOR mode terms are considered to be in conjunction: the
- * above mentioned query is parsed as capital AND of AND Hungary
- */
- public void setDefaultOperator(Operator op) {
- this.operator = op;
- }
-
- /**
- * Gets implicit operator setting, which will be either AND_OPERATOR
- * or OR_OPERATOR.
- */
- public Operator getDefaultOperator() {
- return operator;
- }
-
- /**
- * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
- * lower-cased or not. Default is true.
- */
- public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
- this.lowercaseExpandedTerms = lowercaseExpandedTerms;
- }
-
- /**
- * @see #setLowercaseExpandedTerms(boolean)
- */
- public boolean getLowercaseExpandedTerms() {
- return lowercaseExpandedTerms;
- }
- /**
- * By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
- * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
- * a) Runs faster b) Does not have the scarcity of terms unduly influence score
- * c) avoids any "TooManyBooleanClauses" exception.
- * However, if your application really needs to use the
- * old-fashioned BooleanQuery expansion rewriting and the above
- * points are not relevant then use this to change
- * the rewrite method.
- */
- public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
- multiTermRewriteMethod = method;
- }
-
-
- /**
- * @see #setMultiTermRewriteMethod
- */
- public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
- return multiTermRewriteMethod;
- }
-
- /**
- * Set locale used by date range parsing.
- */
- public void setLocale(Locale locale) {
- this.locale = locale;
- }
-
- /**
- * Returns current locale, allowing access by subclasses.
- */
- public Locale getLocale() {
- return locale;
- }
-
- protected void addClause(List\.
- */
- public static String escape(String s) {
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < s.length(); i++) {
- char c = s.charAt(i);
- // NOTE: keep this in sync with _ESCAPED_CHAR below!
- if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
- || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
- || c == '*' || c == '?') {
- sb.append('\\');
- }
- sb.append(c);
- }
- return sb.toString();
- }
-
- /**
- * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
- * Usage:
- * java org.apache.lucene.queryParser.QueryParser <input>
- */
-// public static void main(String[] args) throws Exception {
-// if (args.length == 0) {
-// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
-// System.exit(0);
-// }
-// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
-// new org.apache.lucene.analysis.SimpleAnalyzer());
-// Query q = qp.parse(args[0]);
-// System.out.println(q.toString("field"));
-// }
-}
-
-PARSER_END(PrecedenceQueryParser)
-
-/* ***************** */
-/* Token Definitions */
-/* ***************** */
-
-<*> TOKEN : {
- <#_NUM_CHAR: ["0"-"9"] >
-// NOTE: keep this in sync with escape(String) above!
-| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
- "[", "]", "\"", "{", "}", "~", "*", "?" ] >
-| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
- "[", "]", "\"", "{", "}", "~", "*", "?" ]
- | <_ESCAPED_CHAR> ) >
-| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
-| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
-}
-
-Lucene Precedence Query Parser Processors
+
+Lucene Precedence Query Parser
+
+