Index: pom.xml =================================================================== --- pom.xml (revision 721490) +++ pom.xml (working copy) @@ -65,23 +65,6 @@ - org.codehaus.mojo - javacc-maven-plugin - 2.4.1 - - - fulltext - - ${basedir}/src/main/javacc/fulltext - org.apache.jackrabbit.core.query.lucene.fulltext - - - jjtree-javacc - - - - - maven-surefire-plugin Index: src/main/java/org/apache/jackrabbit/core/query/lucene/fulltext/FastCharStream.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/fulltext/FastCharStream.java (revision 721490) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/fulltext/FastCharStream.java (working copy) @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.jackrabbit.core.query.lucene.fulltext; - -import java.io.IOException; - -/** - * Modified version of org.apache.lucene.queryParser.FastCharStream - * based on a String input. - *

- * An efficient implementation of JavaCC's CharStream interface.

Note that - * this does not do line-number counting, but instead keeps track of the - * character position of the token in the input, as required by Lucene's {@link - * org.apache.lucene.analysis.Token} API. - */ -public final class FastCharStream implements CharStream { - - /** - * Next char to read. - */ - private int position; - - /** - * Offset in String for current token. - */ - private int tokenStart; - - /** - * The input String. - */ - private String input; - - /** - * Constructs from a String. - */ - public FastCharStream(String input) { - this.input = input; - } - - /** - * @inheritDoc - */ - public char readChar() throws IOException { - if (position >= input.length()) { - throw new IOException("read past eof"); - } - return input.charAt(position++); - } - - /** - * @inheritDoc - */ - public char BeginToken() throws IOException { - tokenStart = position; - return readChar(); - } - - /** - * @inheritDoc - */ - public void backup(int amount) { - position -= amount; - } - - /** - * @inheritDoc - */ - public String GetImage() { - return input.substring(tokenStart, position); - } - - /** - * @inheritDoc - */ - public char[] GetSuffix(int len) { - char[] value = new char[len]; - for (int i = 0; i < len; i++) { - value[i] = input.charAt(position - len + i); - } - return value; - } - - /** - * @inheritDoc - */ - public void Done() { - } - - /** - * @inheritDoc - */ - public int getColumn() { - return position; - } - - /** - * @inheritDoc - */ - public int getLine() { - return 1; - } - - /** - * @inheritDoc - */ - public int getEndColumn() { - return position; - } - - /** - * @inheritDoc - */ - public int getEndLine() { - return 1; - } - - /** - * @inheritDoc - */ - public int getBeginColumn() { - return tokenStart; - } - - /** - * @inheritDoc - */ - public int getBeginLine() { - return 1; - } -} Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java (revision 0) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java (revision 0) @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import java.util.Vector; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanClause; + +/** + * JackrabbitQueryParser extends the standard lucene query parser + * and adds JCR specific customizations. + */ +public class JackrabbitQueryParser extends QueryParser { + + /** + * The Jackrabbit synonym provider or null if there is none. + */ + private final SynonymProvider synonymProvider; + + /** + * Creates a new query parser instance. + * + * @param fieldName the field name. + * @param analyzer the analyzer. + * @param synonymProvider the synonym provider or null if none + * is available. + */ + public JackrabbitQueryParser(String fieldName, + Analyzer analyzer, + SynonymProvider synonymProvider) { + super(fieldName, analyzer); + this.synonymProvider = synonymProvider; + setAllowLeadingWildcard(true); + setDefaultOperator(Operator.AND); + } + + /** + * {@inheritDoc} + */ + public Query parse(String textsearch) throws ParseException { + // replace escaped ' with just ' + StringBuffer rewritten = new StringBuffer(); + // the default lucene query parser recognizes 'AND' and 'NOT' as + // keywords. + textsearch = textsearch.replaceAll("AND", "and"); + textsearch = textsearch.replaceAll("NOT", "not"); + boolean escaped = false; + for (int i = 0; i < textsearch.length(); i++) { + if (textsearch.charAt(i) == '\\') { + if (escaped) { + rewritten.append("\\\\"); + escaped = false; + } else { + escaped = true; + } + } else if (textsearch.charAt(i) == '\'') { + if (escaped) { + escaped = false; + } + rewritten.append(textsearch.charAt(i)); + } else if (textsearch.charAt(i) == '~') { + if (i == 0 || Character.isWhitespace(textsearch.charAt(i - 1))) { + // escape tilde so we can use it for similarity query + rewritten.append("\\"); + } + rewritten.append('~'); + } else { + if (escaped) { + rewritten.append('\\'); + escaped = false; + } + rewritten.append(textsearch.charAt(i)); + } + } + return super.parse(rewritten.toString()); + } + + /** + * Factory method for generating a synonym query. + * Called when parser parses an input term token that has the synonym + * prefix (~term) prepended. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * + * @return Resulting {@link Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getSynonymQuery(String field, String termStr) + throws ParseException { + Vector synonyms = new Vector(); + synonyms.add(new BooleanClause(getFieldQuery(field, termStr), + BooleanClause.Occur.SHOULD)); + if (synonymProvider != null) { + String[] terms = synonymProvider.getSynonyms(termStr); + for (int i = 0; i < terms.length; i++) { + synonyms.add(new BooleanClause(getFieldQuery(field, terms[i]), + BooleanClause.Occur.SHOULD)); + } + } + if (synonyms.size() == 1) { + return ((BooleanClause) synonyms.get(0)).getQuery(); + } else { + return getBooleanQuery(synonyms); + } + } + + + /** + * {@inheritDoc} + */ + protected Query getFieldQuery(String field, String queryText) + throws ParseException { + if (queryText.startsWith("~")) { + // synonym query + return getSynonymQuery(field, queryText.substring(1)); + } else { + return super.getFieldQuery(field, queryText); + } + } + + /** + * {@inheritDoc} + */ + protected Query getPrefixQuery(String field, String termStr) + throws ParseException { + return getWildcardQuery(field, termStr + "*"); + } + + /** + * {@inheritDoc} + */ + protected Query getWildcardQuery(String field, String termStr) + throws ParseException { + if (getLowercaseExpandedTerms()) { + termStr = termStr.toLowerCase(); + } + return new WildcardQuery(field, null, translateWildcards(termStr)); + } + + /** + * Translates unescaped wildcards '*' and '?' into '%' and '_'. + * + * @param input the input String. + * @return the translated String. + */ + private String translateWildcards(String input) { + StringBuffer translated = new StringBuffer(input.length()); + boolean escaped = false; + for (int i = 0; i < input.length(); i++) { + if (input.charAt(i) == '\\') { + if (escaped) { + translated.append("\\\\"); + escaped = false; + } else { + escaped = true; + } + } else if (input.charAt(i) == '*') { + if (escaped) { + translated.append('*'); + escaped = false; + } else { + translated.append('%'); + } + } else if (input.charAt(i) == '?') { + if (escaped) { + translated.append('?'); + escaped = false; + } else { + translated.append('_'); + } + } else if (input.charAt(i) == '%' || input.charAt(i) == '_') { + // escape every occurrence of '%' and '_' + escaped = false; + translated.append('\\').append(input.charAt(i)); + } else { + if (escaped) { + translated.append('\\'); + escaped = false; + } + translated.append(input.charAt(i)); + } + } + return translated.toString(); + } +} Property changes on: src\main\java\org\apache\jackrabbit\core\query\lucene\JackrabbitQueryParser.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java (revision 721490) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java (working copy) @@ -17,7 +17,6 @@ package org.apache.jackrabbit.core.query.lucene; import org.apache.jackrabbit.core.query.PropertyTypeRegistry; -import org.apache.jackrabbit.core.query.lucene.fulltext.QueryParser; import org.apache.jackrabbit.core.SessionImpl; import org.apache.jackrabbit.core.HierarchyManager; import org.apache.jackrabbit.core.NodeImpl; @@ -66,6 +65,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -354,39 +354,9 @@ tmp.append(propName.getLocalName()); fieldname = tmp.toString(); } - QueryParser parser = new QueryParser( + QueryParser parser = new JackrabbitQueryParser( fieldname, analyzer, synonymProvider); - parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND); - // replace escaped ' with just ' - StringBuffer query = new StringBuffer(); - String textsearch = node.getFullTextSearchExpression(); - // the default lucene query parser recognizes 'AND' and 'NOT' as - // keywords. - textsearch = textsearch.replaceAll("AND", "and"); - textsearch = textsearch.replaceAll("NOT", "not"); - boolean escaped = false; - for (int i = 0; i < textsearch.length(); i++) { - if (textsearch.charAt(i) == '\\') { - if (escaped) { - query.append("\\\\"); - escaped = false; - } else { - escaped = true; - } - } else if (textsearch.charAt(i) == '\'') { - if (escaped) { - escaped = false; - } - query.append(textsearch.charAt(i)); - } else { - if (escaped) { - query.append('\\'); - escaped = false; - } - query.append(textsearch.charAt(i)); - } - } - return parser.parse(query.toString()); + return parser.parse(node.getFullTextSearchExpression()); } public Object visit(FullTextSearchScoreImpl node, Object data) { Index: src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (revision 721490) +++ src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (working copy) @@ -37,8 +37,6 @@ import org.apache.jackrabbit.core.SearchManager; import org.apache.jackrabbit.core.SessionImpl; import org.apache.jackrabbit.core.query.PropertyTypeRegistry; -import org.apache.jackrabbit.core.query.lucene.fulltext.ParseException; -import org.apache.jackrabbit.core.query.lucene.fulltext.QueryParser; import org.apache.jackrabbit.core.state.ItemStateManager; import org.apache.jackrabbit.spi.Name; import org.apache.jackrabbit.spi.Path; @@ -73,6 +71,8 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryParser.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -379,39 +379,9 @@ tmp.append(propName.getLocalName()); fieldname = tmp.toString(); } - QueryParser parser = new QueryParser( + QueryParser parser = new JackrabbitQueryParser( fieldname, analyzer, synonymProvider); - parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND); - // replace escaped ' with just ' - StringBuffer query = new StringBuffer(); - String textsearch = node.getQuery(); - // the default lucene query parser recognizes 'AND' and 'NOT' as - // keywords. - textsearch = textsearch.replaceAll("AND", "and"); - textsearch = textsearch.replaceAll("NOT", "not"); - boolean escaped = false; - for (int i = 0; i < textsearch.length(); i++) { - if (textsearch.charAt(i) == '\\') { - if (escaped) { - query.append("\\\\"); - escaped = false; - } else { - escaped = true; - } - } else if (textsearch.charAt(i) == '\'') { - if (escaped) { - escaped = false; - } - query.append(textsearch.charAt(i)); - } else { - if (escaped) { - query.append('\\'); - escaped = false; - } - query.append(textsearch.charAt(i)); - } - } - Query context = parser.parse(query.toString()); + Query context = parser.parse(node.getQuery()); if (relPath != null && (!node.getReferencesProperty() || relPath.getLength() > 1)) { // text search on some child axis Path.Element[] elements = relPath.getElements(); Index: src/main/javacc/fulltext/QueryParser.jjt =================================================================== --- src/main/javacc/fulltext/QueryParser.jjt (revision 721490) +++ src/main/javacc/fulltext/QueryParser.jjt (working copy) @@ -1,923 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -options { - STATIC=false; - JAVA_UNICODE_ESCAPE=true; - USER_CHAR_STREAM=true; -} - -PARSER_BEGIN(QueryParser) - -package org.apache.jackrabbit.core.query.lucene.fulltext; - -import java.util.Vector; -import java.io.*; -import java.text.*; -import java.util.*; -import org.apache.lucene.index.Term; -import org.apache.lucene.analysis.*; -import org.apache.lucene.document.*; -import org.apache.lucene.search.*; - -import org.apache.jackrabbit.core.query.lucene.WildcardQuery; -import org.apache.jackrabbit.core.query.lucene.SynonymProvider; - -/** - * This class is generated by JavaCC. The only method that clients should need - * to call is parse(). - * - * The syntax for query strings is as follows: - * A Query is a series of clauses. - * A clause may be prefixed by: - *

    - *
  • a plus (+) or a minus (-) sign, indicating - * that the clause is required or prohibited respectively; or - *
  • a term followed by a colon, indicating the field to be searched. - * This enables one to construct queries which search multiple fields. - *
- * - * A clause may be either: - *
    - *
  • a term, indicating all the documents that contain this term; or - *
  • a nested query, enclosed in parentheses. Note that this may be used - * with a +/- prefix to require any of a set of - * terms. - *
- * - * Thus, in BNF, the query grammar is: - *
- *   Query  ::= ( Clause )*
- *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
- * 
- * - *

- * Examples of appropriately formatted queries can be found in the test cases. - *

- * - * @author Brian Goetz - * @author Peter Halacsy - * @author Tatu Saloranta - */ - -public class QueryParser { - - private static final int CONJ_NONE = 0; - private static final int CONJ_AND = 1; - private static final int CONJ_OR = 2; - - private static final int MOD_NONE = 0; - private static final int MOD_NOT = 10; - private static final int MOD_REQ = 11; - - public static final int DEFAULT_OPERATOR_OR = 0; - public static final int DEFAULT_OPERATOR_AND = 1; - - /** The actual operator that parser uses to combine query terms */ - private int operator = DEFAULT_OPERATOR_OR; - - /** - * Whether terms of wildcard and prefix queries are to be automatically - * lower-cased or not. Default is true. - */ - boolean lowercaseWildcardTerms = true; - - Analyzer analyzer; - String field; - int phraseSlop = 0; - float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; - Locale locale = Locale.getDefault(); - SynonymProvider synonymProvider; - - /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. - * @param query the query string to be parsed. - * @param field the default field for query terms. - * @param analyzer used to find terms in the query text. - * @throws ParseException if the parsing fails - */ - static public Query parse(String query, String field, Analyzer analyzer) - throws ParseException { - QueryParser parser = new QueryParser(field, analyzer); - return parser.parse(query); - } - - /** Constructs a query parser. - * @param f the default field for query terms. - * @param a used to find terms in the query text. - */ - public QueryParser(String f, Analyzer a) { - this(f, a, null); - } - - /** Constructs a query parser. - * @param f the default field for query terms. - * @param a used to find terms in the query text. - * @param sp the synonym provider - */ - public QueryParser(String f, Analyzer a, SynonymProvider sp) { - this(new FastCharStream("")); - analyzer = a; - field = f; - synonymProvider = sp; - } - - /** Parses a query string, returning a - * Query. - * @param query the query string to be parsed. - * @throws ParseException if the parsing fails - */ - public Query parse(String query) throws ParseException { - ReInit(new FastCharStream(query)); - try { - return Query(field); - } - catch (TokenMgrError tme) { - throw new ParseException(tme.getMessage()); - } - catch (BooleanQuery.TooManyClauses tmc) { - throw new ParseException("Too many boolean clauses"); - } - } - - /** - * @return Returns the analyzer. - */ - public Analyzer getAnalyzer() { - return analyzer; - } - - /** - * @return Returns the field. - */ - public String getField() { - return field; - } - - /** - * Get the default minimal similarity for fuzzy queries. - */ - public float getFuzzyMinSim() { - return fuzzyMinSim; - } - /** - *Set the default minimum similarity for fuzzy queries. - */ - public void setFuzzyMinSim(float fuzzyMinSim) { - this.fuzzyMinSim = fuzzyMinSim; - } - - /** - * Sets the default slop for phrases. If zero, then exact phrase matches - * are required. Default value is zero. - */ - public void setPhraseSlop(int phraseSlop) { - this.phraseSlop = phraseSlop; - } - - /** - * Gets the default slop for phrases. - */ - public int getPhraseSlop() { - return phraseSlop; - } - - /** - * Sets the boolean operator of the QueryParser. - * In classic mode (DEFAULT_OPERATOR_OR) terms without any modifiers - * are considered optional: for example capital of Hungary is equal to - * capital OR of OR Hungary.
- * In DEFAULT_OPERATOR_AND terms are considered to be in conjuction: the - * above mentioned query is parsed as capital AND of AND Hungary - */ - public void setOperator(int operator) { - this.operator = operator; - } - - /** - * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND - * or DEFAULT_OPERATOR_OR. - */ - public int getOperator() { - return operator; - } - - public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) { - this.lowercaseWildcardTerms = lowercaseWildcardTerms; - } - - public boolean getLowercaseWildcardTerms() { - return lowercaseWildcardTerms; - } - - /** - * Set locale used by date range parsing. - */ - public void setLocale(Locale locale) { - this.locale = locale; - } - - /** - * Returns current locale, allowing access by subclasses. - */ - public Locale getLocale() { - return locale; - } - - protected void addClause(Vector clauses, int conj, int mods, Query q) { - boolean required, prohibited; - - // If this term is introduced by AND, make the preceding term required, - // unless it's already prohibited - if (clauses.size() > 0 && conj == CONJ_AND) { - BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); - if (!c.isProhibited()) - c.setOccur(BooleanClause.Occur.MUST); - } - - if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) { - // If this term is introduced by OR, make the preceding term optional, - // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) - // notice if the input is a OR b, first term is parsed as required; without - // this modification a OR b would parsed as +a OR b - BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); - if (!c.isProhibited()) - c.setOccur(BooleanClause.Occur.SHOULD); - } - - // We might have been passed a null query; the term might have been - // filtered away by the analyzer. - if (q == null) - return; - - if (operator == DEFAULT_OPERATOR_OR) { - // We set REQUIRED if we're introduced by AND or +; PROHIBITED if - // introduced by NOT or -; make sure not to set both. - prohibited = (mods == MOD_NOT); - required = (mods == MOD_REQ); - if (conj == CONJ_AND && !prohibited) { - required = true; - } - } else { - // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED - // if not PROHIBITED and not introduced by OR - prohibited = (mods == MOD_NOT); - required = (!prohibited && conj != CONJ_OR); - } - BooleanClause.Occur occur = null; - if (required) { - occur = BooleanClause.Occur.MUST; - } else if (prohibited) { - occur = BooleanClause.Occur.MUST_NOT; - } else { - occur = BooleanClause.Occur.SHOULD; - } - clauses.addElement(new BooleanClause(q, occur)); - } - - /** - * Note that parameter analyzer is ignored. Calls inside the parser always - * use class member analyser. This method will be deprecated and substituted - * by {@link #getFieldQuery(String, String)} in future versions of Lucene. - * Currently overwriting either of these methods works. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, - Analyzer analyzer, - String queryText) throws ParseException { - return getFieldQuery(field, queryText); - } - - /** - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, String queryText) throws ParseException { - // Use the analyzer to get all the tokens, and then build a TermQuery, - // PhraseQuery, or nothing based on the term count - - TokenStream source = analyzer.tokenStream(field, - new StringReader(queryText)); - Vector v = new Vector(); - org.apache.lucene.analysis.Token t; - - while (true) { - try { - t = source.next(); - } - catch (IOException e) { - t = null; - } - if (t == null) - break; - v.addElement(t.termText()); - } - try { - source.close(); - } - catch (IOException e) { - // ignore - } - - if (v.size() == 0) - return null; - else if (v.size() == 1) - return new TermQuery(new Term(field, (String) v.elementAt(0))); - else { - PhraseQuery q = new PhraseQuery(); - q.setSlop(phraseSlop); - for (int i=0; i - * Depending on settings, prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *

- * Can be overridden by extending classes, to provide custom handling for - * wildcard queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains one or more wild card - * characters (? or *), but is not simple prefix term - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getWildcardQuery(String field, String termStr) throws ParseException - { - if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); - } - return new WildcardQuery(field, null, translateWildcards(termStr)); - } - - /** - * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses an input term - * token that uses prefix notation; that is, contains a single '*' wildcard - * character as its last character. Since this is a special case - * of generic wildcard term, and such a query can be optimized easily, - * this usually results in a different query object. - *

- * Depending on settings, a prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *

- * Can be overridden by extending classes, to provide custom handling for - * wild card queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * (without trailing '*' character!) - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getPrefixQuery(String field, String termStr) throws ParseException - { - return getWildcardQuery(field, termStr + "*"); - } - - /** - * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses - * an input term token that has the fuzzy suffix (~) appended. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFuzzyQuery(String field, String termStr) throws ParseException { - return getFuzzyQuery(field, termStr, fuzzyMinSim); - } - - /** - * Translates unescaped wildcards '*' and '?' into '%' and '_'. - * - * @param input the input String. - * @return the translated String. - */ - private String translateWildcards(String input) { - StringBuffer translated = new StringBuffer(input.length()); - boolean escaped = false; - for (int i = 0; i < input.length(); i++) { - if (input.charAt(i) == '\\') { - if (escaped) { - translated.append("\\\\"); - escaped = false; - } else { - escaped = true; - } - } else if (input.charAt(i) == '*') { - if (escaped) { - translated.append('*'); - escaped = false; - } else { - translated.append('%'); - } - } else if (input.charAt(i) == '?') { - if (escaped) { - translated.append('?'); - escaped = false; - } else { - translated.append('_'); - } - } else if (input.charAt(i) == '%' || input.charAt(i) == '_') { - // escape every occurrence of '%' and '_' - escaped = false; - translated.append('\\').append(input.charAt(i)); - } else { - if (escaped) { - translated.append('\\'); - escaped = false; - } - translated.append(input.charAt(i)); - } - } - return translated.toString(); - } - - /** - * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses - * an input term token that has the fuzzy suffix (~floatNumber) appended. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * @param minSimilarity the minimum similarity required for a fuzzy match - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException - { - Term t = new Term(field, termStr); - return new FuzzyQuery(t, minSimilarity); - } - - /** - * Factory method for generating a synonym query. - * Called when parser parses an input term token that has the synonym - * prefix (~term) prepended. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getSynonymQuery(String field, String termStr) throws ParseException - { - Vector synonyms = new Vector(); - synonyms.add(new BooleanClause(getFieldQuery(field, termStr), BooleanClause.Occur.SHOULD)); - if (synonymProvider != null) { - String[] terms = synonymProvider.getSynonyms(termStr); - for (int i = 0; i < terms.length; i++) { - synonyms.add(new BooleanClause(getFieldQuery(field, terms[i]), BooleanClause.Occur.SHOULD)); - } - } - if (synonyms.size() == 1) { - return ((BooleanClause) synonyms.get(0)).getQuery(); - } else { - return getBooleanQuery(synonyms); - } - } - - /** - * Returns a String where the escape char has been - * removed, or kept only once if there was a double escape. - */ - private String discardEscapeChar(String input) { - char[] caSource = input.toCharArray(); - char[] caDest = new char[caSource.length]; - int j = 0; - for (int i = 0; i < caSource.length; i++) { - if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { - caDest[j++]=caSource[i]; - } - } - return new String(caDest, 0, j); - } - - /** - * Returns a String where those characters that QueryParser - * expects to be escaped are escaped, i.e. preceded by a \. - */ - public static String escape(String s) { - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < s.length(); i++) { - char c = s.charAt(i); - // NOTE: keep this in sync with _ESCAPED_CHAR below! - if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' - || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' - || c == '*' || c == '?') { - sb.append('\\'); - } - sb.append(c); - } - return sb.toString(); - } - - public static void main(String[] args) throws Exception { - QueryParser qp = new QueryParser("field", - new org.apache.lucene.analysis.SimpleAnalyzer()); - Query q = qp.parse(args[0]); - System.out.println(q.toString("field")); - } -} - -PARSER_END(QueryParser) - -/* ***************** */ -/* Token Definitions */ -/* ***************** */ - -<*> TOKEN : { - <#_NUM_CHAR: ["0"-"9"] > -// NOTE: keep this in sync with escape(String) above! -| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?" ] > -| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?" ] - | <_ESCAPED_CHAR> ) > -| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > -| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") > -} - - SKIP : { - < <_WHITESPACE>> -} - -// OG: to support prefix queries: -// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137 -// Change from: -// | -// (<_TERM_CHAR> | ( [ "*", "?" ] ))* > -// To: -// -// | | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > - - TOKEN : { - -| -| -| -| -| -| -| -| : Boost -| -| (<_TERM_CHAR>)* > -| )+ ( "." (<_NUM_CHAR>)+ )? )? > -| > -| (<_TERM_CHAR>)* "*" > -// support for prefix queries enabled! -| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > -| : RangeIn -| : RangeEx -} - - TOKEN : { -)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT -} - - TOKEN : { - -| : DEFAULT -| -| -} - - TOKEN : { - -| : DEFAULT -| -| -} - -// * Query ::= ( Clause )* -// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) - -int Conjunction() : { - int ret = CONJ_NONE; -} -{ - [ - { ret = CONJ_AND; } - | { ret = CONJ_OR; } - ] - { return ret; } -} - -int Modifiers() : { - int ret = MOD_NONE; -} -{ - [ - { ret = MOD_REQ; } - | { ret = MOD_NOT; } - | { ret = MOD_NOT; } - ] - { return ret; } -} - -Query Query(String field) : -{ - Vector clauses = new Vector(); - Query q, firstQuery=null; - int conj, mods; -} -{ - mods=Modifiers() q=Clause(field) - { - addClause(clauses, CONJ_NONE, mods, q); - if (mods == MOD_NONE) - firstQuery=q; - } - ( - conj=Conjunction() mods=Modifiers() q=Clause(field) - { addClause(clauses, conj, mods, q); } - )* - { - if (clauses.size() == 1 && firstQuery != null) - return firstQuery; - else { - return getBooleanQuery(clauses); - } - } -} - -Query Clause(String field) : { - Query q; - Token fieldToken=null, boost=null; -} -{ - [ - LOOKAHEAD(2) - fieldToken= { - field=discardEscapeChar(fieldToken.image); - } - ] - - ( - q=Term(field) - | q=Query(field) ( boost=)? - - ) - { - if (boost != null) { - float f = (float)1.0; - try { - f = Float.parseFloat(boost.image); - q.setBoost(f); - } catch (Exception ignored) { } - } - return q; - } -} - - -Query Term(String field) : { - Token term, boost=null, fuzzySlop=null, goop1, goop2; - boolean prefix = false; - boolean wildcard = false; - boolean fuzzy = false; - boolean synonym = false; - boolean rangein = false; - Query q; -} -{ - ( - ( - term= - | term= { prefix=true; } - | term= { synonym=true; } - | term= { wildcard=true; } - | term= - ) - [ fuzzySlop= { fuzzy=true; } ] - [ boost= [ fuzzySlop= { fuzzy=true; } ] ] - { - String termImage=discardEscapeChar(term.image); - if (wildcard) { - q = getWildcardQuery(field, termImage); - } else if (prefix) { - q = getPrefixQuery(field, - discardEscapeChar(term.image.substring - (0, term.image.length()-1))); - } else if (synonym) { - q = getSynonymQuery(field, - discardEscapeChar(term.image.substring(1, term.image.length()))); - } else if (fuzzy) { - float fms = fuzzyMinSim; - try { - fms = Float.parseFloat(fuzzySlop.image.substring(1)); - } catch (Exception ignored) { } - if(fms < 0.0f || fms > 1.0f){ - throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); - } - if(fms == fuzzyMinSim) - q = getFuzzyQuery(field, termImage); - else - q = getFuzzyQuery(field, termImage, fms); - } else { - q = getFieldQuery(field, analyzer, termImage); - } - } - | ( ( goop1=|goop1= ) - [ ] ( goop2=|goop2= ) - ) - [ boost= ] - { - if (goop1.kind == RANGEIN_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); - } - if (goop2.kind == RANGEIN_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); - } - | ( ( goop1=|goop1= ) - [ ] ( goop2=|goop2= ) - ) - [ boost= ] - { - if (goop1.kind == RANGEEX_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); - } - if (goop2.kind == RANGEEX_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - - q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); - } - | term= - [ fuzzySlop= ] - [ boost= ] - { - int s = phraseSlop; - - if (fuzzySlop != null) { - try { - s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); - } - catch (Exception ignored) { } - } - q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s); - } - ) - { - if (boost != null) { - float f = (float) 1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - } - catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } - - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q.setBoost(f); - } - } - return q; - } -}