Index: contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java =================================================================== --- contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (revision 820510) +++ contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (working copy) @@ -67,8 +67,8 @@ boolean inPhrase = false; int savedStart = 0, savedEnd = 0; - TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class); + TermAttribute termAtt = addAttribute(TermAttribute.class); + OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); public boolean incrementToken() throws IOException { if (inPhrase) { Index: contrib/misc/src/java/org/apache/lucene/queryParser/precedence/FastCharStream.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/queryParser/precedence/FastCharStream.java (revision 820510) +++ contrib/misc/src/java/org/apache/lucene/queryParser/precedence/FastCharStream.java (working copy) @@ -18,8 +18,6 @@ * limitations under the License. */ -import org.apache.lucene.queryParser.*; - import java.io.*; /** An efficient implementation of JavaCC's CharStream interface.

Note that Index: contrib/misc/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java (revision 820510) +++ contrib/misc/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java (working copy) @@ -6,12 +6,16 @@ import java.text.DateFormat; import java.util.ArrayList; import java.util.Date; +import java.util.LinkedList; import java.util.List; import java.util.Locale; import java.util.Vector; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; @@ -27,64 +31,68 @@ import org.apache.lucene.util.Parameter; /** - * Experimental query parser variant designed to handle operator precedence - * in a more sensible fashion than QueryParser. There are still some - * open issues with this parser. The following tests are currently failing - * in TestPrecedenceQueryParser and are disabled to make this test pass: + * Experimental query parser variant designed to handle operator precedence in a + * more sensible fashion than QueryParser. There are still some open issues with + * this parser. The following tests are currently failing in + * TestPrecedenceQueryParser and are disabled to make this test pass: *

- * - * This class is generated by JavaCC. The only method that clients should need + * + * This class is generated by JavaCC. The only method that clients should need * to call is {@link #parse(String)}. - * - * The syntax for query strings is as follows: - * A Query is a series of clauses. - * A clause may be prefixed by: + * + * The syntax for query strings is as follows: A Query is a series of clauses. A + * clause may be prefixed by: * - * + * * A clause may be either: * - * + * * Thus, in BNF, the query grammar is: + * *
  *   Query  ::= ( Clause )*
- *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+ *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
  * 
- * + * *

* Examples of appropriately formatted queries can be found in the query syntax * documentation. *

- * + * */ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { - private static final int CONJ_NONE = 0; - private static final int CONJ_AND = 1; - private static final int CONJ_OR = 2; + private static final int CONJ_NONE = 0; - private static final int MOD_NONE = 0; - private static final int MOD_NOT = 10; - private static final int MOD_REQ = 11; + private static final int CONJ_AND = 1; + private static final int CONJ_OR = 2; + + private static final int MOD_NONE = 0; + + private static final int MOD_NOT = 10; + + private static final int MOD_REQ = 11; + // make it possible to call setDefaultOperator() without accessing // the nested class: public static final Operator AND_OPERATOR = Operator.AND; + public static final Operator OR_OPERATOR = Operator.OR; /** The actual operator that parser uses to combine query terms */ @@ -93,23 +101,32 @@ boolean lowercaseExpandedTerms = true; Analyzer analyzer; + String field; + int phraseSlop = 0; + float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; + int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; + Locale locale = Locale.getDefault(); static final class Operator extends Parameter { private Operator(String name) { super(name); } + static final Operator OR = new Operator("OR"); + static final Operator AND = new Operator("AND"); } - /** Constructs a query parser. - * @param f the default field for query terms. - * @param a used to find terms in the query text. + /** + * Constructs a query parser. + * + * @param f the default field for query terms. + * @param a used to find terms in the query text. */ public PrecedenceQueryParser(String f, Analyzer a) { this(new FastCharStream(new StringReader(""))); @@ -117,9 +134,11 @@ field = f; } - /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. - * @param expression the query string to be parsed. - * @throws ParseException if the parsing fails + /** + * Parses a query string, returning a {@link org.apache.lucene.search.Query}. + * + * @param expression the query string to be parsed. + * @throws ParseException if the parsing fails */ public Query parse(String expression) throws ParseException { // optimize empty query to be empty BooleanQuery @@ -131,16 +150,14 @@ try { Query query = Query(field); return (query != null) ? query : new BooleanQuery(); - } - catch (TokenMgrError tme) { + } catch (TokenMgrError tme) { throw new ParseException(tme.getMessage()); - } - catch (BooleanQuery.TooManyClauses tmc) { + } catch (BooleanQuery.TooManyClauses tmc) { throw new ParseException("Too many boolean clauses"); } } - /** + /** * @return Returns the analyzer. */ public Analyzer getAnalyzer() { @@ -154,23 +171,23 @@ return field; } - /** + /** * Get the minimal similarity for fuzzy queries. */ public float getFuzzyMinSim() { - return fuzzyMinSim; + return fuzzyMinSim; } /** - * Set the minimum similarity for fuzzy queries. - * Default is 0.5f. + * Set the minimum similarity for fuzzy queries. Default is 0.5f. */ public void setFuzzyMinSim(float fuzzyMinSim) { - this.fuzzyMinSim = fuzzyMinSim; + this.fuzzyMinSim = fuzzyMinSim; } - /** - * Get the prefix length for fuzzy queries. + /** + * Get the prefix length for fuzzy queries. + * * @return Returns the fuzzyPrefixLength. */ public int getFuzzyPrefixLength() { @@ -179,6 +196,7 @@ /** * Set the prefix length for fuzzy queries. Default is 0. + * * @param fuzzyPrefixLength The fuzzyPrefixLength to set. */ public void setFuzzyPrefixLength(int fuzzyPrefixLength) { @@ -186,8 +204,8 @@ } /** - * Sets the default slop for phrases. If zero, then exact phrase matches - * are required. Default value is zero. + * Sets the default slop for phrases. If zero, then exact phrase matches are + * required. Default value is zero. */ public void setPhraseSlop(int phraseSlop) { this.phraseSlop = phraseSlop; @@ -201,28 +219,29 @@ } /** - * Sets the boolean operator of the QueryParser. - * In default mode (OR_OPERATOR) terms without any modifiers - * are considered optional: for example capital of Hungary is equal to + * Sets the boolean operator of the QueryParser. In default mode ( + * OR_OPERATOR) terms without any modifiers are considered + * optional: for example capital of Hungary is equal to * capital OR of OR Hungary.
- * In AND_OPERATOR mode terms are considered to be in conjuction: the - * above mentioned query is parsed as capital AND of AND Hungary + * In AND_OPERATOR mode terms are considered to be in conjuction: + * the above mentioned query is parsed as + * capital AND of AND Hungary */ public void setDefaultOperator(Operator op) { this.operator = op; } /** - * Gets implicit operator setting, which will be either AND_OPERATOR - * or OR_OPERATOR. + * Gets implicit operator setting, which will be either AND_OPERATOR or + * OR_OPERATOR. */ public Operator getDefaultOperator() { return operator; } /** - * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically - * lower-cased or not. Default is true. + * Whether terms of wildcard, prefix, fuzzy and range queries are to be + * automatically lower-cased or not. Default is true. */ public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { this.lowercaseExpandedTerms = lowercaseExpandedTerms; @@ -262,17 +281,19 @@ // If this term is introduced by AND, make the preceding term required, // unless it's already prohibited if (clauses.size() > 0 && conj == CONJ_AND) { - BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1); + BooleanClause c = (BooleanClause) clauses.get(clauses.size() - 1); if (!c.isProhibited()) c.setOccur(BooleanClause.Occur.MUST); } if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, - // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) - // notice if the input is a OR b, first term is parsed as required; without + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR + // b) + // notice if the input is a OR b, first term is parsed as required; + // without // this modification a OR b would parsed as +a OR b - BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1); + BooleanClause c = (BooleanClause) clauses.get(clauses.size() - 1); if (!c.isProhibited()) c.setOccur(BooleanClause.Occur.SHOULD); } @@ -294,7 +315,7 @@ // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED // if not PROHIBITED and not introduced by OR prohibited = (modifier == MOD_NOT); - required = (!prohibited && conj != CONJ_OR); + required = (!prohibited && conj != CONJ_OR); } if (required && !prohibited) clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST)); @@ -303,100 +324,178 @@ else if (!required && prohibited) clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST_NOT)); else - throw new RuntimeException("Clause cannot be both required and prohibited"); + throw new RuntimeException( + "Clause cannot be both required and prohibited"); } /** * @exception ParseException throw in overridden method to disallow */ - protected Query getFieldQuery(String field, String queryText) throws ParseException { + protected Query getFieldQuery(String field, String queryText) + throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count - TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); - List list = new ArrayList(); - final org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token(); - org.apache.lucene.analysis.Token nextToken; + TokenStream source = analyzer.tokenStream(field, + new StringReader(queryText)); + try { + source = analyzer.reusableTokenStream(field, new StringReader(queryText)); + source.reset(); + } catch (IOException e) { + source = analyzer.tokenStream(field, new StringReader(queryText)); + } + + CachingTokenFilter buffer = new CachingTokenFilter(source); + TermAttribute termAtt = null; + PositionIncrementAttribute posIncrAtt = null; int positionCount = 0; boolean severalTokensAtSamePosition = false; + int numTokens = 0; - while (true) { - try { - nextToken = source.next(reusableToken); + boolean success = false; + try { + buffer.reset(); + success = true; + } catch (IOException e) { + // success==false if we hit an exception + } + if (success) { + if (buffer.hasAttribute(TermAttribute.class)) { + termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class); } - catch (IOException e) { - nextToken = null; + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = (PositionIncrementAttribute) buffer + .getAttribute(PositionIncrementAttribute.class); } - if (nextToken == null) - break; - list.add(nextToken.clone()); - if (nextToken.getPositionIncrement() == 1) - positionCount++; - else - severalTokensAtSamePosition = true; } + + boolean hasMoreTokens = false; + if (termAtt != null) { + try { + hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt + .getPositionIncrement() : 1; + + if (positionIncrement == 1) + positionCount++; + else + severalTokensAtSamePosition = true; + + hasMoreTokens = buffer.incrementToken(); + + } + } catch (IOException e) { + // ignore + } + } + try { + // rewind the buffer stream + buffer.reset(); + + // close original stream - all tokens buffered source.close(); - } - catch (IOException e) { + } catch (IOException e) { // ignore } - if (list.size() == 0) + if (numTokens == 0) return null; - else if (list.size() == 1) { - nextToken = (org.apache.lucene.analysis.Token) list.get(0); - return new TermQuery(new Term(field, nextToken.term())); + else if (numTokens == 1) { + + try { + buffer.incrementToken(); + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + return new TermQuery(new Term(field, termAtt.term())); + } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = new BooleanQuery(); - for (int i = 0; i < list.size(); i++) { - nextToken = (org.apache.lucene.analysis.Token) list.get(i); - TermQuery currentQuery = new TermQuery( - new Term(field, nextToken.term())); - q.add(currentQuery, BooleanClause.Occur.SHOULD); + + try { + buffer.incrementToken(); + + do { + + TermQuery currentQuery = new TermQuery(new Term(field, termAtt + .term())); + q.add(currentQuery, BooleanClause.Occur.SHOULD); + + } while (buffer.incrementToken()); + + } catch (IOException e) { + // safe to ignore, because we know the number of tokens } + return q; - } - else { + + } else { // phrase query: MultiPhraseQuery mpq = new MultiPhraseQuery(); - List multiTerms = new ArrayList(); - for (int i = 0; i < list.size(); i++) { - nextToken = (org.apache.lucene.analysis.Token) list.get(i); - if (nextToken.getPositionIncrement() == 1 && multiTerms.size() > 0) { - mpq.add((Term[])multiTerms.toArray(new Term[0])); - multiTerms.clear(); - } - multiTerms.add(new Term(field, nextToken.term())); + LinkedList multiTerms = new LinkedList(); + + try { + buffer.incrementToken(); + + do { + + if (posIncrAtt.getPositionIncrement() == 1 + && multiTerms.size() > 0) { + mpq.add((Term[]) multiTerms.toArray(new Term[0])); + multiTerms.clear(); + + } + + multiTerms.add(new Term(field, termAtt.term())); + + } while (buffer.incrementToken()); + + } catch (IOException e) { + // safe to ignore, because we know the number of tokens } - mpq.add((Term[])multiTerms.toArray(new Term[0])); + + mpq.add((Term[]) multiTerms.toArray(new Term[0])); + return mpq; + } - } - else { + } else { PhraseQuery q = new PhraseQuery(); q.setSlop(phraseSlop); - for (int i = 0; i < list.size(); i++) { - q.add(new Term(field, ((org.apache.lucene.analysis.Token) - list.get(i)).term())); + + try { + buffer.incrementToken(); + + do { + q.add(new Term(field, termAtt.term())); + } while (buffer.incrementToken()); + + } catch (IOException e) { + // safe to ignore, because we know the number of tokens } + return q; + } } } /** * Base implementation delegates to {@link #getFieldQuery(String,String)}. - * This method may be overridden, for example, to return - * a SpanNearQuery instead of a PhraseQuery. - * + * This method may be overridden, for example, to return a SpanNearQuery + * instead of a PhraseQuery. + * * @exception ParseException throw in overridden method to disallow */ protected Query getFieldQuery(String field, String queryText, int slop) - throws ParseException { + throws ParseException { Query query = getFieldQuery(field, queryText); if (query instanceof PhraseQuery) { @@ -412,11 +511,8 @@ /** * @exception ParseException throw in overridden method to disallow */ - protected Query getRangeQuery(String field, - String part1, - String part2, - boolean inclusive) throws ParseException - { + protected Query getRangeQuery(String field, String part1, String part2, + boolean inclusive) throws ParseException { if (lowercaseExpandedTerms) { part1 = part1.toLowerCase(); part2 = part2.toLowerCase(); @@ -428,83 +524,71 @@ Date d2 = df.parse(part2); part1 = DateTools.dateToString(d1, DateTools.Resolution.DAY); part2 = DateTools.dateToString(d2, DateTools.Resolution.DAY); + } catch (Exception e) { } - catch (Exception e) { } - return new RangeQuery(new Term(field, part1), - new Term(field, part2), - inclusive); + return new RangeQuery(new Term(field, part1), new Term(field, part2), + inclusive); } /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link BooleanClause} instances - * to join. - * + * Factory method for generating query, given a set of clauses. By default + * creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being returned. + * + * @param clauses List that contains {@link BooleanClause} instances to join. + * * @return Resulting {@link Query} object. * @exception ParseException throw in overridden method to disallow * @deprecated use {@link #getBooleanQuery(List)} instead */ - protected Query getBooleanQuery(Vector clauses) throws ParseException - { + protected Query getBooleanQuery(Vector clauses) throws ParseException { return getBooleanQuery((List) clauses, false); } /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link BooleanClause} instances - * to join. - * + * Factory method for generating query, given a set of clauses. By default + * creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being returned. + * + * @param clauses List that contains {@link BooleanClause} instances to join. + * * @return Resulting {@link Query} object. * @exception ParseException throw in overridden method to disallow */ - protected Query getBooleanQuery(List clauses) throws ParseException - { + protected Query getBooleanQuery(List clauses) throws ParseException { return getBooleanQuery(clauses, false); } /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link BooleanClause} instances - * to join. + * Factory method for generating query, given a set of clauses. By default + * creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being returned. + * + * @param clauses List that contains {@link BooleanClause} instances to join. * @param disableCoord true if coord scoring should be disabled. - * + * * @return Resulting {@link Query} object. * @exception ParseException throw in overridden method to disallow * @deprecated use {@link #getBooleanQuery(List, boolean)} instead */ protected Query getBooleanQuery(Vector clauses, boolean disableCoord) - throws ParseException - { + throws ParseException { return getBooleanQuery((List) clauses, disableCoord); } /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link BooleanClause} instances - * to join. + * Factory method for generating query, given a set of clauses. By default + * creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being returned. + * + * @param clauses List that contains {@link BooleanClause} instances to join. * @param disableCoord true if coord scoring should be disabled. - * + * * @return Resulting {@link Query} object. * @exception ParseException throw in overridden method to disallow */ @@ -515,34 +599,32 @@ BooleanQuery query = new BooleanQuery(disableCoord); for (int i = 0; i < clauses.size(); i++) { - query.add((BooleanClause)clauses.get(i)); + query.add((BooleanClause) clauses.get(i)); } return query; } /** - * Factory method for generating a query. Called when parser - * parses an input term token that contains one or more wildcard - * characters (? and *), but is not a prefix term token (one - * that has just a single * character at the end) + * Factory method for generating a query. Called when parser parses an input + * term token that contains one or more wildcard characters (? and *), but is + * not a prefix term token (one that has just a single * character at the end) *

- * Depending on settings, prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. + * Depending on settings, prefix term may be lower-cased automatically. It + * will not go through the default Analyzer, however, since normal Analyzers + * are unlikely to work properly with wildcard templates. *

* Can be overridden by extending classes, to provide custom handling for * wildcard queries, which may be necessary due to missing analyzer calls. - * + * * @param field Name of the field query will use. - * @param termStr Term token that contains one or more wild card - * characters (? or *), but is not simple prefix term - * + * @param termStr Term token that contains one or more wild card characters (? + * or *), but is not simple prefix term + * * @return Resulting {@link Query} built for the term * @exception ParseException throw in overridden method to disallow */ - protected Query getWildcardQuery(String field, String termStr) throws ParseException - { + protected Query getWildcardQuery(String field, String termStr) + throws ParseException { if (lowercaseExpandedTerms) { termStr = termStr.toLowerCase(); } @@ -551,30 +633,28 @@ } /** - * Factory method for generating a query (similar to - * {@link #getWildcardQuery}). Called when parser parses an input term - * token that uses prefix notation; that is, contains a single '*' wildcard - * character as its last character. Since this is a special case - * of generic wildcard term, and such a query can be optimized easily, - * this usually results in a different query object. + * Factory method for generating a query (similar to {@link #getWildcardQuery} + * ). Called when parser parses an input term token that uses prefix notation; + * that is, contains a single '*' wildcard character as its last character. + * Since this is a special case of generic wildcard term, and such a query can + * be optimized easily, this usually results in a different query object. *

- * Depending on settings, a prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. + * Depending on settings, a prefix term may be lower-cased automatically. It + * will not go through the default Analyzer, however, since normal Analyzers + * are unlikely to work properly with wildcard templates. *

- * Can be overridden by extending classes, to provide custom handling for - * wild card queries, which may be necessary due to missing analyzer calls. - * + * Can be overridden by extending classes, to provide custom handling for wild + * card queries, which may be necessary due to missing analyzer calls. + * * @param field Name of the field query will use. * @param termStr Term token to use for building term for the query - * (without trailing '*' character!) - * + * (without trailing '*' character!) + * * @return Resulting {@link Query} built for the term * @exception ParseException throw in overridden method to disallow */ - protected Query getPrefixQuery(String field, String termStr) throws ParseException - { + protected Query getPrefixQuery(String field, String termStr) + throws ParseException { if (lowercaseExpandedTerms) { termStr = termStr.toLowerCase(); } @@ -582,19 +662,19 @@ return new PrefixQuery(t); } - /** - * Factory method for generating a query (similar to - * {@link #getWildcardQuery}). Called when parser parses - * an input term token that has the fuzzy suffix (~) appended. - * + /** + * Factory method for generating a query (similar to {@link #getWildcardQuery} + * ). Called when parser parses an input term token that has the fuzzy suffix + * (~) appended. + * * @param field Name of the field query will use. * @param termStr Term token to use for building term for the query - * + * * @return Resulting {@link Query} built for the term * @exception ParseException throw in overridden method to disallow */ - protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException - { + protected Query getFuzzyQuery(String field, String termStr, + float minSimilarity) throws ParseException { if (lowercaseExpandedTerms) { termStr = termStr.toLowerCase(); } @@ -603,33 +683,33 @@ } /** - * Returns a String where the escape char has been - * removed, or kept only once if there was a double escape. + * Returns a String where the escape char has been removed, or kept only once + * if there was a double escape. */ private String discardEscapeChar(String input) { char[] caSource = input.toCharArray(); char[] caDest = new char[caSource.length]; int j = 0; for (int i = 0; i < caSource.length; i++) { - if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { - caDest[j++]=caSource[i]; + if ((caSource[i] != '\\') || (i > 0 && caSource[i - 1] == '\\')) { + caDest[j++] = caSource[i]; } } return new String(caDest, 0, j); } /** - * Returns a String where those characters that QueryParser - * expects to be escaped are escaped by a preceding \. + * Returns a String where those characters that QueryParser expects to be + * escaped are escaped by a preceding \. */ public static String escape(String s) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); // NOTE: keep this in sync with _ESCAPED_CHAR below! - if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' - || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' - || c == '*' || c == '?') { + if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' + || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' + || c == '{' || c == '}' || c == '~' || c == '*' || c == '?') { sb.append('\\'); } sb.append(c); @@ -638,492 +718,540 @@ } /** - * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}. - * Usage:
+ * Command line tool to test QueryParser, using + * {@link org.apache.lucene.analysis.SimpleAnalyzer}. Usage:
* java org.apache.lucene.queryParser.QueryParser <input> */ public static void main(String[] args) throws Exception { if (args.length == 0) { - System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser "); + System.out + .println("Usage: java org.apache.lucene.queryParser.QueryParser "); System.exit(0); } PrecedenceQueryParser qp = new PrecedenceQueryParser("field", - new org.apache.lucene.analysis.SimpleAnalyzer()); + new org.apache.lucene.analysis.SimpleAnalyzer()); Query q = qp.parse(args[0]); System.out.println(q.toString("field")); } -// * Query ::= ( Clause )* -// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) + // * Query ::= ( Clause )* + // * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) final public int Conjunction() throws ParseException { - int ret = CONJ_NONE; - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case AND: - case OR: - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + int ret = CONJ_NONE; + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { case AND: - jj_consume_token(AND); + case OR: + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case AND: + jj_consume_token(AND); ret = CONJ_AND; + break; + case OR: + jj_consume_token(OR); + ret = CONJ_OR; + break; + default: + jj_la1[0] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } break; - case OR: - jj_consume_token(OR); - ret = CONJ_OR; - break; default: - jj_la1[0] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - break; - default: - jj_la1[1] = jj_gen; - ; + jj_la1[1] = jj_gen; + ; } - {if (true) return ret;} + { + if (true) + return ret; + } throw new Error("Missing return statement in function"); } final public int Modifier() throws ParseException { - int ret = MOD_NONE; - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case NOT: - case PLUS: - case MINUS: - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + int ret = MOD_NONE; + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case NOT: case PLUS: - jj_consume_token(PLUS); - ret = MOD_REQ; - break; case MINUS: - jj_consume_token(MINUS); - ret = MOD_NOT; + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case PLUS: + jj_consume_token(PLUS); + ret = MOD_REQ; + break; + case MINUS: + jj_consume_token(MINUS); + ret = MOD_NOT; + break; + case NOT: + jj_consume_token(NOT); + ret = MOD_NOT; + break; + default: + jj_la1[2] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } break; - case NOT: - jj_consume_token(NOT); - ret = MOD_NOT; - break; default: - jj_la1[2] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - break; - default: - jj_la1[3] = jj_gen; - ; + jj_la1[3] = jj_gen; + ; } - {if (true) return ret;} + { + if (true) + return ret; + } throw new Error("Missing return statement in function"); } final public Query Query(String field) throws ParseException { - List clauses = new ArrayList(); - Query q, firstQuery=null; - boolean orPresent = false; - int modifier; + List clauses = new ArrayList(); + Query q, firstQuery = null; + boolean orPresent = false; + int modifier; modifier = Modifier(); q = andExpression(field); addClause(clauses, CONJ_NONE, modifier, q); if (modifier == MOD_NONE) firstQuery = q; - label_1: - while (true) { - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case OR: - case NOT: - case PLUS: - case MINUS: - case LPAREN: - case QUOTED: - case TERM: - case PREFIXTERM: - case WILDTERM: - case RANGEIN_START: - case RANGEEX_START: - case NUMBER: - ; - break; - default: - jj_la1[4] = jj_gen; - break label_1; + label_1: while (true) { + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case OR: + case NOT: + case PLUS: + case MINUS: + case LPAREN: + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case RANGEIN_START: + case RANGEEX_START: + case NUMBER: + ; + break; + default: + jj_la1[4] = jj_gen; + break label_1; } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case OR: - jj_consume_token(OR); - orPresent=true; - break; - default: - jj_la1[5] = jj_gen; - ; + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case OR: + jj_consume_token(OR); + orPresent = true; + break; + default: + jj_la1[5] = jj_gen; + ; } modifier = Modifier(); q = andExpression(field); addClause(clauses, orPresent ? CONJ_OR : CONJ_NONE, modifier, q); } - if (clauses.size() == 1 && firstQuery != null) - {if (true) return firstQuery;} - else { - {if (true) return getBooleanQuery(clauses);} + if (clauses.size() == 1 && firstQuery != null) { + if (true) + return firstQuery; + } else { + { + if (true) + return getBooleanQuery(clauses); } + } throw new Error("Missing return statement in function"); } final public Query andExpression(String field) throws ParseException { - List clauses = new ArrayList(); - Query q, firstQuery=null; - int modifier; + List clauses = new ArrayList(); + Query q, firstQuery = null; + int modifier; q = Clause(field); addClause(clauses, CONJ_NONE, MOD_NONE, q); firstQuery = q; - label_2: - while (true) { - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case AND: - ; - break; - default: - jj_la1[6] = jj_gen; - break label_2; + label_2: while (true) { + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case AND: + ; + break; + default: + jj_la1[6] = jj_gen; + break label_2; } jj_consume_token(AND); modifier = Modifier(); q = Clause(field); addClause(clauses, CONJ_AND, modifier, q); } - if (clauses.size() == 1 && firstQuery != null) - {if (true) return firstQuery;} - else { - {if (true) return getBooleanQuery(clauses);} + if (clauses.size() == 1 && firstQuery != null) { + if (true) + return firstQuery; + } else { + { + if (true) + return getBooleanQuery(clauses); } + } throw new Error("Missing return statement in function"); } final public Query Clause(String field) throws ParseException { - Query q; - Token fieldToken=null, boost=null; + Query q; + Token fieldToken = null, boost = null; if (jj_2_1(2)) { fieldToken = jj_consume_token(TERM); jj_consume_token(COLON); - field=discardEscapeChar(fieldToken.image); + field = discardEscapeChar(fieldToken.image); } else { ; } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case QUOTED: - case TERM: - case PREFIXTERM: - case WILDTERM: - case RANGEIN_START: - case RANGEEX_START: - case NUMBER: - q = Term(field); - break; - case LPAREN: - jj_consume_token(LPAREN); - q = Query(field); - jj_consume_token(RPAREN); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case RANGEIN_START: + case RANGEEX_START: + case NUMBER: + q = Term(field); break; + case LPAREN: + jj_consume_token(LPAREN); + q = Query(field); + jj_consume_token(RPAREN); + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[7] = jj_gen; + ; + } + break; default: - jj_la1[7] = jj_gen; - ; + jj_la1[8] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + if (boost != null) { + float f = (float) 1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + q.setBoost(f); + } catch (Exception ignored) { } - break; - default: - jj_la1[8] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); } - if (boost != null) { - float f = (float)1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - q.setBoost(f); - } catch (Exception ignored) { } - } - {if (true) return q;} + { + if (true) + return q; + } throw new Error("Missing return statement in function"); } final public Query Term(String field) throws ParseException { - Token term, boost=null, fuzzySlop=null, goop1, goop2; - boolean prefix = false; - boolean wildcard = false; - boolean fuzzy = false; - Query q; - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case TERM: - case PREFIXTERM: - case WILDTERM: - case NUMBER: - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + Token term, boost = null, fuzzySlop = null, goop1, goop2; + boolean prefix = false; + boolean wildcard = false; + boolean fuzzy = false; + Query q; + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { case TERM: - term = jj_consume_token(TERM); - break; case PREFIXTERM: - term = jj_consume_token(PREFIXTERM); - prefix=true; - break; case WILDTERM: - term = jj_consume_token(WILDTERM); - wildcard=true; - break; case NUMBER: - term = jj_consume_token(NUMBER); - break; - default: - jj_la1[9] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy=true; - break; - default: - jj_la1[10] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy=true; - break; - default: - jj_la1[11] = jj_gen; - ; + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case TERM: + term = jj_consume_token(TERM); + break; + case PREFIXTERM: + term = jj_consume_token(PREFIXTERM); + prefix = true; + break; + case WILDTERM: + term = jj_consume_token(WILDTERM); + wildcard = true; + break; + case NUMBER: + term = jj_consume_token(NUMBER); + break; + default: + jj_la1[9] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); } - break; - default: - jj_la1[12] = jj_gen; - ; - } - String termImage=discardEscapeChar(term.image); - if (wildcard) { - q = getWildcardQuery(field, termImage); - } else if (prefix) { - q = getPrefixQuery(field, - discardEscapeChar(term.image.substring - (0, term.image.length()-1))); - } else if (fuzzy) { + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy = true; + break; + default: + jj_la1[10] = jj_gen; + ; + } + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy = true; + break; + default: + jj_la1[11] = jj_gen; + ; + } + break; + default: + jj_la1[12] = jj_gen; + ; + } + String termImage = discardEscapeChar(term.image); + if (wildcard) { + q = getWildcardQuery(field, termImage); + } else if (prefix) { + q = getPrefixQuery(field, discardEscapeChar(term.image.substring(0, + term.image.length() - 1))); + } else if (fuzzy) { float fms = fuzzyMinSim; try { fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); - } catch (Exception ignored) { } - if(fms < 0.0f || fms > 1.0f){ - {if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");} - } - q = getFuzzyQuery(field, termImage, fms); - } else { - q = getFieldQuery(field, termImage); - } - break; - case RANGEIN_START: - jj_consume_token(RANGEIN_START); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEIN_GOOP: - goop1 = jj_consume_token(RANGEIN_GOOP); + } catch (Exception ignored) { + } + if (fms < 0.0f || fms > 1.0f) { + { + if (true) + throw new ParseException( + "Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); + } + } + q = getFuzzyQuery(field, termImage, fms); + } else { + q = getFieldQuery(field, termImage); + } break; - case RANGEIN_QUOTED: - goop1 = jj_consume_token(RANGEIN_QUOTED); + case RANGEIN_START: + jj_consume_token(RANGEIN_START); + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case RANGEIN_GOOP: + goop1 = jj_consume_token(RANGEIN_GOOP); + break; + case RANGEIN_QUOTED: + goop1 = jj_consume_token(RANGEIN_QUOTED); + break; + default: + jj_la1[13] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case RANGEIN_TO: + jj_consume_token(RANGEIN_TO); + break; + default: + jj_la1[14] = jj_gen; + ; + } + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case RANGEIN_GOOP: + goop2 = jj_consume_token(RANGEIN_GOOP); + break; + case RANGEIN_QUOTED: + goop2 = jj_consume_token(RANGEIN_QUOTED); + break; + default: + jj_la1[15] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + jj_consume_token(RANGEIN_END); + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[16] = jj_gen; + ; + } + if (goop1.kind == RANGEIN_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length() - 1); + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEIN_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length() - 1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } + q = getRangeQuery(field, goop1.image, goop2.image, true); break; - default: - jj_la1[13] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEIN_TO: - jj_consume_token(RANGEIN_TO); + case RANGEEX_START: + jj_consume_token(RANGEEX_START); + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case RANGEEX_GOOP: + goop1 = jj_consume_token(RANGEEX_GOOP); + break; + case RANGEEX_QUOTED: + goop1 = jj_consume_token(RANGEEX_QUOTED); + break; + default: + jj_la1[17] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case RANGEEX_TO: + jj_consume_token(RANGEEX_TO); + break; + default: + jj_la1[18] = jj_gen; + ; + } + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case RANGEEX_GOOP: + goop2 = jj_consume_token(RANGEEX_GOOP); + break; + case RANGEEX_QUOTED: + goop2 = jj_consume_token(RANGEEX_QUOTED); + break; + default: + jj_la1[19] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + jj_consume_token(RANGEEX_END); + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[20] = jj_gen; + ; + } + if (goop1.kind == RANGEEX_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length() - 1); + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEEX_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length() - 1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } + + q = getRangeQuery(field, goop1.image, goop2.image, false); break; - default: - jj_la1[14] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEIN_GOOP: - goop2 = jj_consume_token(RANGEIN_GOOP); - break; - case RANGEIN_QUOTED: - goop2 = jj_consume_token(RANGEIN_QUOTED); - break; - default: - jj_la1[15] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - jj_consume_token(RANGEIN_END); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[16] = jj_gen; - ; - } - if (goop1.kind == RANGEIN_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); + case QUOTED: + term = jj_consume_token(QUOTED); + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + break; + default: + jj_la1[21] = jj_gen; + ; + } + switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[22] = jj_gen; + ; + } + int s = phraseSlop; + + if (fuzzySlop != null) { + try { + s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); + } catch (Exception ignored) { } - if (goop2.kind == RANGEIN_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - q = getRangeQuery(field, goop1.image, goop2.image, true); - break; - case RANGEEX_START: - jj_consume_token(RANGEEX_START); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEEX_GOOP: - goop1 = jj_consume_token(RANGEEX_GOOP); + } + q = getFieldQuery(field, term.image.substring(1, + term.image.length() - 1), s); break; - case RANGEEX_QUOTED: - goop1 = jj_consume_token(RANGEEX_QUOTED); - break; default: - jj_la1[17] = jj_gen; + jj_la1[23] = jj_gen; jj_consume_token(-1); throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEEX_TO: - jj_consume_token(RANGEEX_TO); - break; - default: - jj_la1[18] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEEX_GOOP: - goop2 = jj_consume_token(RANGEEX_GOOP); - break; - case RANGEEX_QUOTED: - goop2 = jj_consume_token(RANGEEX_QUOTED); - break; - default: - jj_la1[19] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - jj_consume_token(RANGEEX_END); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[20] = jj_gen; - ; - } - if (goop1.kind == RANGEEX_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); - } - if (goop2.kind == RANGEEX_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - - q = getRangeQuery(field, goop1.image, goop2.image, false); - break; - case QUOTED: - term = jj_consume_token(QUOTED); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - break; - default: - jj_la1[21] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[22] = jj_gen; - ; - } - int s = phraseSlop; - - if (fuzzySlop != null) { - try { - s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); - } - catch (Exception ignored) { } - } - q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s); - break; - default: - jj_la1[23] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); } if (boost != null) { float f = (float) 1.0; try { f = Float.valueOf(boost.image).floatValue(); + } catch (Exception ignored) { + /* + * Should this be handled somehow? (defaults to "no boost", if boost + * number is invalid) + */ } - catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } // avoid boosting null queries, such as those caused by stop words if (q != null) { q.setBoost(f); } } - {if (true) return q;} + { + if (true) + return q; + } throw new Error("Missing return statement in function"); } final private boolean jj_2_1(int xla) { - jj_la = xla; jj_lastpos = jj_scanpos = token; - try { return !jj_3_1(); } - catch(LookaheadSuccess ls) { return true; } - finally { jj_save(0, xla); } + jj_la = xla; + jj_lastpos = jj_scanpos = token; + try { + return !jj_3_1(); + } catch (LookaheadSuccess ls) { + return true; + } finally { + jj_save(0, xla); + } } final private boolean jj_3_1() { - if (jj_scan_token(TERM)) return true; - if (jj_scan_token(COLON)) return true; + if (jj_scan_token(TERM)) + return true; + if (jj_scan_token(COLON)) + return true; return false; } public PrecedenceQueryParserTokenManager token_source; + public Token token, jj_nt; + private int jj_ntk; + private Token jj_scanpos, jj_lastpos; + private int jj_la; + public boolean lookingAhead = false; + private boolean jj_semLA; + private int jj_gen; + final private int[] jj_la1 = new int[24]; + static private int[] jj_la1_0; static { - jj_la1_0(); - } - private static void jj_la1_0() { - jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f00,0x100,0x80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,}; - } + jj_la1_0(); + } + + private static void jj_la1_0() { + jj_la1_0 = new int[] { 0x180, 0x180, 0xe00, 0xe00, 0xfb1f00, 0x100, 0x80, + 0x8000, 0xfb1000, 0x9a0000, 0x40000, 0x40000, 0x8000, 0xc000000, + 0x1000000, 0xc000000, 0x8000, 0xc0000000, 0x10000000, 0xc0000000, + 0x8000, 0x40000, 0x8000, 0xfb0000, }; + } + final private JJCalls[] jj_2_rtns = new JJCalls[1]; + private boolean jj_rescan = false; + private int jj_gc = 0; public PrecedenceQueryParser(CharStream stream) { @@ -1131,8 +1259,10 @@ token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + for (int i = 0; i < 24; i++) + jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) + jj_2_rtns[i] = new JJCalls(); } public void ReInit(CharStream stream) { @@ -1140,8 +1270,10 @@ token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + for (int i = 0; i < 24; i++) + jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) + jj_2_rtns[i] = new JJCalls(); } public PrecedenceQueryParser(PrecedenceQueryParserTokenManager tm) { @@ -1149,8 +1281,10 @@ token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + for (int i = 0; i < 24; i++) + jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) + jj_2_rtns[i] = new JJCalls(); } public void ReInit(PrecedenceQueryParserTokenManager tm) { @@ -1158,14 +1292,18 @@ token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + for (int i = 0; i < 24; i++) + jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) + jj_2_rtns[i] = new JJCalls(); } final private Token jj_consume_token(int kind) throws ParseException { Token oldToken; - if ((oldToken = token).next != null) token = token.next; - else token = token.next = token_source.getNextToken(); + if ((oldToken = token).next != null) + token = token.next; + else + token = token.next = token_source.getNextToken(); jj_ntk = -1; if (token.kind == kind) { jj_gen++; @@ -1174,7 +1312,8 @@ for (int i = 0; i < jj_2_rtns.length; i++) { JJCalls c = jj_2_rtns[i]; while (c != null) { - if (c.gen < jj_gen) c.first = null; + if (c.gen < jj_gen) + c.first = null; c = c.next; } } @@ -1186,8 +1325,11 @@ throw generateParseException(); } - static private final class LookaheadSuccess extends java.lang.Error { } + static private final class LookaheadSuccess extends java.lang.Error { + } + final private LookaheadSuccess jj_ls = new LookaheadSuccess(); + final private boolean jj_scan_token(int kind) { if (jj_scanpos == jj_lastpos) { jj_la--; @@ -1200,18 +1342,27 @@ jj_scanpos = jj_scanpos.next; } if (jj_rescan) { - int i = 0; Token tok = token; - while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; } - if (tok != null) jj_add_error_token(kind, i); + int i = 0; + Token tok = token; + while (tok != null && tok != jj_scanpos) { + i++; + tok = tok.next; + } + if (tok != null) + jj_add_error_token(kind, i); } - if (jj_scanpos.kind != kind) return true; - if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls; + if (jj_scanpos.kind != kind) + return true; + if (jj_la == 0 && jj_scanpos == jj_lastpos) + throw jj_ls; return false; } final public Token getNextToken() { - if (token.next != null) token = token.next; - else token = token.next = token_source.getNextToken(); + if (token.next != null) + token = token.next; + else + token = token.next = token_source.getNextToken(); jj_ntk = -1; jj_gen++; return token; @@ -1220,27 +1371,34 @@ final public Token getToken(int index) { Token t = lookingAhead ? jj_scanpos : token; for (int i = 0; i < index; i++) { - if (t.next != null) t = t.next; - else t = t.next = token_source.getNextToken(); + if (t.next != null) + t = t.next; + else + t = t.next = token_source.getNextToken(); } return t; } final private int jj_ntk() { - if ((jj_nt=token.next) == null) - return (jj_ntk = (token.next=token_source.getNextToken()).kind); + if ((jj_nt = token.next) == null) + return (jj_ntk = (token.next = token_source.getNextToken()).kind); else return (jj_ntk = jj_nt.kind); } private java.util.Vector jj_expentries = new java.util.Vector(); + private int[] jj_expentry; + private int jj_kind = -1; + private int[] jj_lasttokens = new int[100]; + private int jj_endpos; private void jj_add_error_token(int kind, int pos) { - if (pos >= 100) return; + if (pos >= 100) + return; if (pos == jj_endpos + 1) { jj_lasttokens[jj_endpos++] = kind; } else if (jj_endpos != 0) { @@ -1249,8 +1407,9 @@ jj_expentry[i] = jj_lasttokens[i]; } boolean exists = false; - for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) { - int[] oldentry = (int[])(e.nextElement()); + for (java.util.Enumeration e = jj_expentries.elements(); e + .hasMoreElements();) { + int[] oldentry = (int[]) (e.nextElement()); if (oldentry.length == jj_expentry.length) { exists = true; for (int i = 0; i < jj_expentry.length; i++) { @@ -1259,11 +1418,14 @@ break; } } - if (exists) break; + if (exists) + break; } } - if (!exists) jj_expentries.addElement(jj_expentry); - if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind; + if (!exists) + jj_expentries.addElement(jj_expentry); + if (pos != 0) + jj_lasttokens[(jj_endpos = pos) - 1] = kind; } } @@ -1280,7 +1442,7 @@ for (int i = 0; i < 24; i++) { if (jj_la1[i] == jj_gen) { for (int j = 0; j < 32; j++) { - if ((jj_la1_0[i] & (1< jj_gen) { - jj_la = p.arg; jj_lastpos = jj_scanpos = p.first; + jj_la = p.arg; + jj_lastpos = jj_scanpos = p.first; switch (i) { - case 0: jj_3_1(); break; + case 0: + jj_3_1(); + break; } } p = p.next; @@ -1329,16 +1494,24 @@ final private void jj_save(int index, int xla) { JJCalls p = jj_2_rtns[index]; while (p.gen > jj_gen) { - if (p.next == null) { p = p.next = new JJCalls(); break; } + if (p.next == null) { + p = p.next = new JJCalls(); + break; + } p = p.next; } - p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla; + p.gen = jj_gen + xla - jj_la; + p.first = token; + p.arg = xla; } static final class JJCalls { int gen; + Token first; + int arg; + JJCalls next; } Index: contrib/misc/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java =================================================================== --- contrib/misc/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java (revision 820510) +++ contrib/misc/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java (working copy) @@ -1,28 +1,5 @@ /* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserTokenManager.java */ package org.apache.lucene.queryParser.precedence; -import java.io.IOException; -import java.io.StringReader; -import java.text.DateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Locale; -import java.util.Vector; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.DateTools; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MultiPhraseQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.RangeQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.util.Parameter; public class PrecedenceQueryParserTokenManager implements PrecedenceQueryParserConstants { Index: contrib/misc/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj =================================================================== --- contrib/misc/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj (revision 820510) +++ contrib/misc/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj (working copy) @@ -337,80 +337,156 @@ * @exception ParseException throw in overridden method to disallow */ protected Query getFieldQuery(String field, String queryText) throws ParseException { - // Use the analyzer to get all the tokens, and then build a TermQuery, + // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count - TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); - List list = new ArrayList(); - final org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token(); - org.apache.lucene.analysis.Token nextToken; + TokenStream source = analyzer.tokenStream(field, + new StringReader(queryText)); + try { + source = analyzer.reusableTokenStream(field, new StringReader(queryText)); + source.reset(); + } catch (IOException e) { + source = analyzer.tokenStream(field, new StringReader(queryText)); + } + + CachingTokenFilter buffer = new CachingTokenFilter(source); + TermAttribute termAtt = null; + PositionIncrementAttribute posIncrAtt = null; int positionCount = 0; boolean severalTokensAtSamePosition = false; + int numTokens = 0; - while (true) { - try { - nextToken = source.next(reusableToken); + boolean success = false; + try { + buffer.reset(); + success = true; + } catch (IOException e) { + // success==false if we hit an exception + } + if (success) { + if (buffer.hasAttribute(TermAttribute.class)) { + termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class); } - catch (IOException e) { - nextToken = null; + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = (PositionIncrementAttribute) buffer + .getAttribute(PositionIncrementAttribute.class); } - if (nextToken == null) - break; - list.add(nextToken.clone()); - if (nextToken.getPositionIncrement() == 1) - positionCount++; - else - severalTokensAtSamePosition = true; } + + boolean hasMoreTokens = false; + if (termAtt != null) { + try { + hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt + .getPositionIncrement() : 1; + + if (positionIncrement == 1) + positionCount++; + else + severalTokensAtSamePosition = true; + + hasMoreTokens = buffer.incrementToken(); + + } + } catch (IOException e) { + // ignore + } + } + try { + // rewind the buffer stream + buffer.reset(); + + // close original stream - all tokens buffered source.close(); - } - catch (IOException e) { + } catch (IOException e) { // ignore } - if (list.size() == 0) + if (numTokens == 0) return null; - else if (list.size() == 1) { - nextToken = (org.apache.lucene.analysis.Token) list.get(0); - return new TermQuery(new Term(field, nextToken.term())); + else if (numTokens == 1) { + + try { + buffer.incrementToken(); + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + return new TermQuery(new Term(field, termAtt.term())); + } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = new BooleanQuery(); - for (int i = 0; i < list.size(); i++) { - nextToken = (org.apache.lucene.analysis.Token) list.get(i); - TermQuery currentQuery = new TermQuery( - new Term(field, nextToken.term())); - q.add(currentQuery, BooleanClause.Occur.SHOULD); + + try { + buffer.incrementToken(); + + do { + + TermQuery currentQuery = new TermQuery(new Term(field, termAtt + .term())); + q.add(currentQuery, BooleanClause.Occur.SHOULD); + + } while (buffer.incrementToken()); + + } catch (IOException e) { + // safe to ignore, because we know the number of tokens } + return q; - } - else { + + } else { // phrase query: MultiPhraseQuery mpq = new MultiPhraseQuery(); - List multiTerms = new ArrayList(); - for (int i = 0; i < list.size(); i++) { - nextToken = (org.apache.lucene.analysis.Token) list.get(i); - if (nextToken.getPositionIncrement() == 1 && multiTerms.size() > 0) { - mpq.add((Term[])multiTerms.toArray(new Term[0])); - multiTerms.clear(); - } - multiTerms.add(new Term(field, nextToken.term())); + LinkedList multiTerms = new LinkedList(); + + try { + buffer.incrementToken(); + + do { + + if (posIncrAtt.getPositionIncrement() == 1 + && multiTerms.size() > 0) { + mpq.add((Term[]) multiTerms.toArray(new Term[0])); + multiTerms.clear(); + + } + + multiTerms.add(new Term(field, termAtt.term())); + + } while (buffer.incrementToken()); + + } catch (IOException e) { + // safe to ignore, because we know the number of tokens } - mpq.add((Term[])multiTerms.toArray(new Term[0])); + + mpq.add((Term[]) multiTerms.toArray(new Term[0])); + return mpq; + } - } - else { + } else { PhraseQuery q = new PhraseQuery(); q.setSlop(phraseSlop); - for (int i = 0; i < list.size(); i++) { - q.add(new Term(field, ((org.apache.lucene.analysis.Token) - list.get(i)).term())); + + try { + buffer.incrementToken(); + + do { + q.add(new Term(field, termAtt.term())); + } while (buffer.incrementToken()); + + } catch (IOException e) { + // safe to ignore, because we know the number of tokens } + return q; + } } }