org.apache.lucene.queryParser.FastCharStream
- * based on a String input.
- *
- * An efficient implementation of JavaCC's CharStream interface. Note that
- * this does not do line-number counting, but instead keeps track of the
- * character position of the token in the input, as required by Lucene's {@link
- * org.apache.lucene.analysis.Token} API.
- */
-public final class FastCharStream implements CharStream {
-
- /**
- * Next char to read.
- */
- private int position;
-
- /**
- * Offset in String for current token.
- */
- private int tokenStart;
-
- /**
- * The input String.
- */
- private String input;
-
- /**
- * Constructs from a String.
- */
- public FastCharStream(String input) {
- this.input = input;
- }
-
- /**
- * @inheritDoc
- */
- public char readChar() throws IOException {
- if (position >= input.length()) {
- throw new IOException("read past eof");
- }
- return input.charAt(position++);
- }
-
- /**
- * @inheritDoc
- */
- public char BeginToken() throws IOException {
- tokenStart = position;
- return readChar();
- }
-
- /**
- * @inheritDoc
- */
- public void backup(int amount) {
- position -= amount;
- }
-
- /**
- * @inheritDoc
- */
- public String GetImage() {
- return input.substring(tokenStart, position);
- }
-
- /**
- * @inheritDoc
- */
- public char[] GetSuffix(int len) {
- char[] value = new char[len];
- for (int i = 0; i < len; i++) {
- value[i] = input.charAt(position - len + i);
- }
- return value;
- }
-
- /**
- * @inheritDoc
- */
- public void Done() {
- }
-
- /**
- * @inheritDoc
- */
- public int getColumn() {
- return position;
- }
-
- /**
- * @inheritDoc
- */
- public int getLine() {
- return 1;
- }
-
- /**
- * @inheritDoc
- */
- public int getEndColumn() {
- return position;
- }
-
- /**
- * @inheritDoc
- */
- public int getEndLine() {
- return 1;
- }
-
- /**
- * @inheritDoc
- */
- public int getBeginColumn() {
- return tokenStart;
- }
-
- /**
- * @inheritDoc
- */
- public int getBeginLine() {
- return 1;
- }
-}
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java (revision 0)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java (revision 0)
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.util.Vector;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.BooleanClause;
+
+/**
+ * JackrabbitQueryParser extends the standard lucene query parser
+ * and adds JCR specific customizations.
+ */
+public class JackrabbitQueryParser extends QueryParser {
+
+ /**
+ * The Jackrabbit synonym provider or null if there is none.
+ */
+ private final SynonymProvider synonymProvider;
+
+ /**
+ * Creates a new query parser instance.
+ *
+ * @param fieldName the field name.
+ * @param analyzer the analyzer.
+ * @param synonymProvider the synonym provider or null if none
+ * is available.
+ */
+ public JackrabbitQueryParser(String fieldName,
+ Analyzer analyzer,
+ SynonymProvider synonymProvider) {
+ super(fieldName, analyzer);
+ this.synonymProvider = synonymProvider;
+ setAllowLeadingWildcard(true);
+ setDefaultOperator(Operator.AND);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public Query parse(String textsearch) throws ParseException {
+ // replace escaped ' with just '
+ StringBuffer rewritten = new StringBuffer();
+ // the default lucene query parser recognizes 'AND' and 'NOT' as
+ // keywords.
+ textsearch = textsearch.replaceAll("AND", "and");
+ textsearch = textsearch.replaceAll("NOT", "not");
+ boolean escaped = false;
+ for (int i = 0; i < textsearch.length(); i++) {
+ if (textsearch.charAt(i) == '\\') {
+ if (escaped) {
+ rewritten.append("\\\\");
+ escaped = false;
+ } else {
+ escaped = true;
+ }
+ } else if (textsearch.charAt(i) == '\'') {
+ if (escaped) {
+ escaped = false;
+ }
+ rewritten.append(textsearch.charAt(i));
+ } else if (textsearch.charAt(i) == '~') {
+ if (i == 0 || Character.isWhitespace(textsearch.charAt(i - 1))) {
+ // escape tilde so we can use it for similarity query
+ rewritten.append("\\");
+ }
+ rewritten.append('~');
+ } else {
+ if (escaped) {
+ rewritten.append('\\');
+ escaped = false;
+ }
+ rewritten.append(textsearch.charAt(i));
+ }
+ }
+ return super.parse(rewritten.toString());
+ }
+
+ /**
+ * Factory method for generating a synonym query.
+ * Called when parser parses an input term token that has the synonym
+ * prefix (~term) prepended.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token to use for building term for the query
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getSynonymQuery(String field, String termStr)
+ throws ParseException {
+ Vector synonyms = new Vector();
+ synonyms.add(new BooleanClause(getFieldQuery(field, termStr),
+ BooleanClause.Occur.SHOULD));
+ if (synonymProvider != null) {
+ String[] terms = synonymProvider.getSynonyms(termStr);
+ for (int i = 0; i < terms.length; i++) {
+ synonyms.add(new BooleanClause(getFieldQuery(field, terms[i]),
+ BooleanClause.Occur.SHOULD));
+ }
+ }
+ if (synonyms.size() == 1) {
+ return ((BooleanClause) synonyms.get(0)).getQuery();
+ } else {
+ return getBooleanQuery(synonyms);
+ }
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Query getFieldQuery(String field, String queryText)
+ throws ParseException {
+ if (queryText.startsWith("~")) {
+ // synonym query
+ return getSynonymQuery(field, queryText.substring(1));
+ } else {
+ return super.getFieldQuery(field, queryText);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Query getPrefixQuery(String field, String termStr)
+ throws ParseException {
+ return getWildcardQuery(field, termStr + "*");
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Query getWildcardQuery(String field, String termStr)
+ throws ParseException {
+ if (getLowercaseExpandedTerms()) {
+ termStr = termStr.toLowerCase();
+ }
+ return new WildcardQuery(field, null, translateWildcards(termStr));
+ }
+
+ /**
+ * Translates unescaped wildcards '*' and '?' into '%' and '_'.
+ *
+ * @param input the input String.
+ * @return the translated String.
+ */
+ private String translateWildcards(String input) {
+ StringBuffer translated = new StringBuffer(input.length());
+ boolean escaped = false;
+ for (int i = 0; i < input.length(); i++) {
+ if (input.charAt(i) == '\\') {
+ if (escaped) {
+ translated.append("\\\\");
+ escaped = false;
+ } else {
+ escaped = true;
+ }
+ } else if (input.charAt(i) == '*') {
+ if (escaped) {
+ translated.append('*');
+ escaped = false;
+ } else {
+ translated.append('%');
+ }
+ } else if (input.charAt(i) == '?') {
+ if (escaped) {
+ translated.append('?');
+ escaped = false;
+ } else {
+ translated.append('_');
+ }
+ } else if (input.charAt(i) == '%' || input.charAt(i) == '_') {
+ // escape every occurrence of '%' and '_'
+ escaped = false;
+ translated.append('\\').append(input.charAt(i));
+ } else {
+ if (escaped) {
+ translated.append('\\');
+ escaped = false;
+ }
+ translated.append(input.charAt(i));
+ }
+ }
+ return translated.toString();
+ }
+}
Property changes on: src\main\java\org\apache\jackrabbit\core\query\lucene\JackrabbitQueryParser.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java (revision 721490)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java (working copy)
@@ -17,7 +17,6 @@
package org.apache.jackrabbit.core.query.lucene;
import org.apache.jackrabbit.core.query.PropertyTypeRegistry;
-import org.apache.jackrabbit.core.query.lucene.fulltext.QueryParser;
import org.apache.jackrabbit.core.SessionImpl;
import org.apache.jackrabbit.core.HierarchyManager;
import org.apache.jackrabbit.core.NodeImpl;
@@ -66,6 +65,7 @@
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -354,39 +354,9 @@
tmp.append(propName.getLocalName());
fieldname = tmp.toString();
}
- QueryParser parser = new QueryParser(
+ QueryParser parser = new JackrabbitQueryParser(
fieldname, analyzer, synonymProvider);
- parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
- // replace escaped ' with just '
- StringBuffer query = new StringBuffer();
- String textsearch = node.getFullTextSearchExpression();
- // the default lucene query parser recognizes 'AND' and 'NOT' as
- // keywords.
- textsearch = textsearch.replaceAll("AND", "and");
- textsearch = textsearch.replaceAll("NOT", "not");
- boolean escaped = false;
- for (int i = 0; i < textsearch.length(); i++) {
- if (textsearch.charAt(i) == '\\') {
- if (escaped) {
- query.append("\\\\");
- escaped = false;
- } else {
- escaped = true;
- }
- } else if (textsearch.charAt(i) == '\'') {
- if (escaped) {
- escaped = false;
- }
- query.append(textsearch.charAt(i));
- } else {
- if (escaped) {
- query.append('\\');
- escaped = false;
- }
- query.append(textsearch.charAt(i));
- }
- }
- return parser.parse(query.toString());
+ return parser.parse(node.getFullTextSearchExpression());
}
public Object visit(FullTextSearchScoreImpl node, Object data) {
Index: src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java
===================================================================
--- src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (revision 721490)
+++ src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (working copy)
@@ -37,8 +37,6 @@
import org.apache.jackrabbit.core.SearchManager;
import org.apache.jackrabbit.core.SessionImpl;
import org.apache.jackrabbit.core.query.PropertyTypeRegistry;
-import org.apache.jackrabbit.core.query.lucene.fulltext.ParseException;
-import org.apache.jackrabbit.core.query.lucene.fulltext.QueryParser;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.spi.Name;
import org.apache.jackrabbit.spi.Path;
@@ -73,6 +71,8 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -379,39 +379,9 @@
tmp.append(propName.getLocalName());
fieldname = tmp.toString();
}
- QueryParser parser = new QueryParser(
+ QueryParser parser = new JackrabbitQueryParser(
fieldname, analyzer, synonymProvider);
- parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
- // replace escaped ' with just '
- StringBuffer query = new StringBuffer();
- String textsearch = node.getQuery();
- // the default lucene query parser recognizes 'AND' and 'NOT' as
- // keywords.
- textsearch = textsearch.replaceAll("AND", "and");
- textsearch = textsearch.replaceAll("NOT", "not");
- boolean escaped = false;
- for (int i = 0; i < textsearch.length(); i++) {
- if (textsearch.charAt(i) == '\\') {
- if (escaped) {
- query.append("\\\\");
- escaped = false;
- } else {
- escaped = true;
- }
- } else if (textsearch.charAt(i) == '\'') {
- if (escaped) {
- escaped = false;
- }
- query.append(textsearch.charAt(i));
- } else {
- if (escaped) {
- query.append('\\');
- escaped = false;
- }
- query.append(textsearch.charAt(i));
- }
- }
- Query context = parser.parse(query.toString());
+ Query context = parser.parse(node.getQuery());
if (relPath != null && (!node.getReferencesProperty() || relPath.getLength() > 1)) {
// text search on some child axis
Path.Element[] elements = relPath.getElements();
Index: src/main/javacc/fulltext/QueryParser.jjt
===================================================================
--- src/main/javacc/fulltext/QueryParser.jjt (revision 721490)
+++ src/main/javacc/fulltext/QueryParser.jjt (working copy)
@@ -1,923 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-options {
- STATIC=false;
- JAVA_UNICODE_ESCAPE=true;
- USER_CHAR_STREAM=true;
-}
-
-PARSER_BEGIN(QueryParser)
-
-package org.apache.jackrabbit.core.query.lucene.fulltext;
-
-import java.util.Vector;
-import java.io.*;
-import java.text.*;
-import java.util.*;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.document.*;
-import org.apache.lucene.search.*;
-
-import org.apache.jackrabbit.core.query.lucene.WildcardQuery;
-import org.apache.jackrabbit.core.query.lucene.SynonymProvider;
-
-/**
- * This class is generated by JavaCC. The only method that clients should need
- * to call is parse().
- *
- * The syntax for query strings is as follows:
- * A Query is a series of clauses.
- * A clause may be prefixed by:
- *
+) or a minus (-) sign, indicating
- * that the clause is required or prohibited respectively; or
- * +/- prefix to require any of a set of
- * terms.
- *
- * Query ::= ( Clause )*
- * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
- *
- *
- * - * Examples of appropriately formatted queries can be found in the test cases. - *
- * - * @author Brian Goetz - * @author Peter Halacsy - * @author Tatu Saloranta - */ - -public class QueryParser { - - private static final int CONJ_NONE = 0; - private static final int CONJ_AND = 1; - private static final int CONJ_OR = 2; - - private static final int MOD_NONE = 0; - private static final int MOD_NOT = 10; - private static final int MOD_REQ = 11; - - public static final int DEFAULT_OPERATOR_OR = 0; - public static final int DEFAULT_OPERATOR_AND = 1; - - /** The actual operator that parser uses to combine query terms */ - private int operator = DEFAULT_OPERATOR_OR; - - /** - * Whether terms of wildcard and prefix queries are to be automatically - * lower-cased or not. Default istrue.
- */
- boolean lowercaseWildcardTerms = true;
-
- Analyzer analyzer;
- String field;
- int phraseSlop = 0;
- float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
- Locale locale = Locale.getDefault();
- SynonymProvider synonymProvider;
-
- /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
- * @param query the query string to be parsed.
- * @param field the default field for query terms.
- * @param analyzer used to find terms in the query text.
- * @throws ParseException if the parsing fails
- */
- static public Query parse(String query, String field, Analyzer analyzer)
- throws ParseException {
- QueryParser parser = new QueryParser(field, analyzer);
- return parser.parse(query);
- }
-
- /** Constructs a query parser.
- * @param f the default field for query terms.
- * @param a used to find terms in the query text.
- */
- public QueryParser(String f, Analyzer a) {
- this(f, a, null);
- }
-
- /** Constructs a query parser.
- * @param f the default field for query terms.
- * @param a used to find terms in the query text.
- * @param sp the synonym provider
- */
- public QueryParser(String f, Analyzer a, SynonymProvider sp) {
- this(new FastCharStream(""));
- analyzer = a;
- field = f;
- synonymProvider = sp;
- }
-
- /** Parses a query string, returning a
- * Query.
- * @param query the query string to be parsed.
- * @throws ParseException if the parsing fails
- */
- public Query parse(String query) throws ParseException {
- ReInit(new FastCharStream(query));
- try {
- return Query(field);
- }
- catch (TokenMgrError tme) {
- throw new ParseException(tme.getMessage());
- }
- catch (BooleanQuery.TooManyClauses tmc) {
- throw new ParseException("Too many boolean clauses");
- }
- }
-
- /**
- * @return Returns the analyzer.
- */
- public Analyzer getAnalyzer() {
- return analyzer;
- }
-
- /**
- * @return Returns the field.
- */
- public String getField() {
- return field;
- }
-
- /**
- * Get the default minimal similarity for fuzzy queries.
- */
- public float getFuzzyMinSim() {
- return fuzzyMinSim;
- }
- /**
- *Set the default minimum similarity for fuzzy queries.
- */
- public void setFuzzyMinSim(float fuzzyMinSim) {
- this.fuzzyMinSim = fuzzyMinSim;
- }
-
- /**
- * Sets the default slop for phrases. If zero, then exact phrase matches
- * are required. Default value is zero.
- */
- public void setPhraseSlop(int phraseSlop) {
- this.phraseSlop = phraseSlop;
- }
-
- /**
- * Gets the default slop for phrases.
- */
- public int getPhraseSlop() {
- return phraseSlop;
- }
-
- /**
- * Sets the boolean operator of the QueryParser.
- * In classic mode (DEFAULT_OPERATOR_OR) terms without any modifiers
- * are considered optional: for example capital of Hungary is equal to
- * capital OR of OR Hungary.DEFAULT_OPERATOR_AND terms are considered to be in conjuction: the
- * above mentioned query is parsed as capital AND of AND Hungary
- */
- public void setOperator(int operator) {
- this.operator = operator;
- }
-
- /**
- * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND
- * or DEFAULT_OPERATOR_OR.
- */
- public int getOperator() {
- return operator;
- }
-
- public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) {
- this.lowercaseWildcardTerms = lowercaseWildcardTerms;
- }
-
- public boolean getLowercaseWildcardTerms() {
- return lowercaseWildcardTerms;
- }
-
- /**
- * Set locale used by date range parsing.
- */
- public void setLocale(Locale locale) {
- this.locale = locale;
- }
-
- /**
- * Returns current locale, allowing access by subclasses.
- */
- public Locale getLocale() {
- return locale;
- }
-
- protected void addClause(Vector clauses, int conj, int mods, Query q) {
- boolean required, prohibited;
-
- // If this term is introduced by AND, make the preceding term required,
- // unless it's already prohibited
- if (clauses.size() > 0 && conj == CONJ_AND) {
- BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
- if (!c.isProhibited())
- c.setOccur(BooleanClause.Occur.MUST);
- }
-
- if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
- // If this term is introduced by OR, make the preceding term optional,
- // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
- // notice if the input is a OR b, first term is parsed as required; without
- // this modification a OR b would parsed as +a OR b
- BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
- if (!c.isProhibited())
- c.setOccur(BooleanClause.Occur.SHOULD);
- }
-
- // We might have been passed a null query; the term might have been
- // filtered away by the analyzer.
- if (q == null)
- return;
-
- if (operator == DEFAULT_OPERATOR_OR) {
- // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
- // introduced by NOT or -; make sure not to set both.
- prohibited = (mods == MOD_NOT);
- required = (mods == MOD_REQ);
- if (conj == CONJ_AND && !prohibited) {
- required = true;
- }
- } else {
- // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
- // if not PROHIBITED and not introduced by OR
- prohibited = (mods == MOD_NOT);
- required = (!prohibited && conj != CONJ_OR);
- }
- BooleanClause.Occur occur = null;
- if (required) {
- occur = BooleanClause.Occur.MUST;
- } else if (prohibited) {
- occur = BooleanClause.Occur.MUST_NOT;
- } else {
- occur = BooleanClause.Occur.SHOULD;
- }
- clauses.addElement(new BooleanClause(q, occur));
- }
-
- /**
- * Note that parameter analyzer is ignored. Calls inside the parser always
- * use class member analyser. This method will be deprecated and substituted
- * by {@link #getFieldQuery(String, String)} in future versions of Lucene.
- * Currently overwriting either of these methods works.
- *
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getFieldQuery(String field,
- Analyzer analyzer,
- String queryText) throws ParseException {
- return getFieldQuery(field, queryText);
- }
-
- /**
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getFieldQuery(String field, String queryText) throws ParseException {
- // Use the analyzer to get all the tokens, and then build a TermQuery,
- // PhraseQuery, or nothing based on the term count
-
- TokenStream source = analyzer.tokenStream(field,
- new StringReader(queryText));
- Vector v = new Vector();
- org.apache.lucene.analysis.Token t;
-
- while (true) {
- try {
- t = source.next();
- }
- catch (IOException e) {
- t = null;
- }
- if (t == null)
- break;
- v.addElement(t.termText());
- }
- try {
- source.close();
- }
- catch (IOException e) {
- // ignore
- }
-
- if (v.size() == 0)
- return null;
- else if (v.size() == 1)
- return new TermQuery(new Term(field, (String) v.elementAt(0)));
- else {
- PhraseQuery q = new PhraseQuery();
- q.setSlop(phraseSlop);
- for (int i=0; i- * Can be overridden by extending classes, to provide custom handling for - * wildcard queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains one or more wild card - * characters (? or *), but is not simple prefix term - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getWildcardQuery(String field, String termStr) throws ParseException - { - if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); - } - return new WildcardQuery(field, null, translateWildcards(termStr)); - } - - /** - * Factory method for generating a query (similar to - * ({@link #getWildcardQuery}). Called when parser parses an input term - * token that uses prefix notation; that is, contains a single '*' wildcard - * character as its last character. Since this is a special case - * of generic wildcard term, and such a query can be optimized easily, - * this usually results in a different query object. - *
- * Depending on settings, a prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *
- * Can be overridden by extending classes, to provide custom handling for
- * wild card queries, which may be necessary due to missing analyzer calls.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
- * (without trailing '*' character!)
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getPrefixQuery(String field, String termStr) throws ParseException
- {
- return getWildcardQuery(field, termStr + "*");
- }
-
- /**
- * Factory method for generating a query (similar to
- * ({@link #getWildcardQuery}). Called when parser parses
- * an input term token that has the fuzzy suffix (~) appended.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getFuzzyQuery(String field, String termStr) throws ParseException {
- return getFuzzyQuery(field, termStr, fuzzyMinSim);
- }
-
- /**
- * Translates unescaped wildcards '*' and '?' into '%' and '_'.
- *
- * @param input the input String.
- * @return the translated String.
- */
- private String translateWildcards(String input) {
- StringBuffer translated = new StringBuffer(input.length());
- boolean escaped = false;
- for (int i = 0; i < input.length(); i++) {
- if (input.charAt(i) == '\\') {
- if (escaped) {
- translated.append("\\\\");
- escaped = false;
- } else {
- escaped = true;
- }
- } else if (input.charAt(i) == '*') {
- if (escaped) {
- translated.append('*');
- escaped = false;
- } else {
- translated.append('%');
- }
- } else if (input.charAt(i) == '?') {
- if (escaped) {
- translated.append('?');
- escaped = false;
- } else {
- translated.append('_');
- }
- } else if (input.charAt(i) == '%' || input.charAt(i) == '_') {
- // escape every occurrence of '%' and '_'
- escaped = false;
- translated.append('\\').append(input.charAt(i));
- } else {
- if (escaped) {
- translated.append('\\');
- escaped = false;
- }
- translated.append(input.charAt(i));
- }
- }
- return translated.toString();
- }
-
- /**
- * Factory method for generating a query (similar to
- * ({@link #getWildcardQuery}). Called when parser parses
- * an input term token that has the fuzzy suffix (~floatNumber) appended.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
- * @param minSimilarity the minimum similarity required for a fuzzy match
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
- {
- Term t = new Term(field, termStr);
- return new FuzzyQuery(t, minSimilarity);
- }
-
- /**
- * Factory method for generating a synonym query.
- * Called when parser parses an input term token that has the synonym
- * prefix (~term) prepended.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getSynonymQuery(String field, String termStr) throws ParseException
- {
- Vector synonyms = new Vector();
- synonyms.add(new BooleanClause(getFieldQuery(field, termStr), BooleanClause.Occur.SHOULD));
- if (synonymProvider != null) {
- String[] terms = synonymProvider.getSynonyms(termStr);
- for (int i = 0; i < terms.length; i++) {
- synonyms.add(new BooleanClause(getFieldQuery(field, terms[i]), BooleanClause.Occur.SHOULD));
- }
- }
- if (synonyms.size() == 1) {
- return ((BooleanClause) synonyms.get(0)).getQuery();
- } else {
- return getBooleanQuery(synonyms);
- }
- }
-
- /**
- * Returns a String where the escape char has been
- * removed, or kept only once if there was a double escape.
- */
- private String discardEscapeChar(String input) {
- char[] caSource = input.toCharArray();
- char[] caDest = new char[caSource.length];
- int j = 0;
- for (int i = 0; i < caSource.length; i++) {
- if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
- caDest[j++]=caSource[i];
- }
- }
- return new String(caDest, 0, j);
- }
-
- /**
- * Returns a String where those characters that QueryParser
- * expects to be escaped are escaped, i.e. preceded by a \.
- */
- public static String escape(String s) {
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < s.length(); i++) {
- char c = s.charAt(i);
- // NOTE: keep this in sync with _ESCAPED_CHAR below!
- if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
- || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
- || c == '*' || c == '?') {
- sb.append('\\');
- }
- sb.append(c);
- }
- return sb.toString();
- }
-
- public static void main(String[] args) throws Exception {
- QueryParser qp = new QueryParser("field",
- new org.apache.lucene.analysis.SimpleAnalyzer());
- Query q = qp.parse(args[0]);
- System.out.println(q.toString("field"));
- }
-}
-
-PARSER_END(QueryParser)
-
-/* ***************** */
-/* Token Definitions */
-/* ***************** */
-
-<*> TOKEN : {
- <#_NUM_CHAR: ["0"-"9"] >
-// NOTE: keep this in sync with escape(String) above!
-| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
- "[", "]", "\"", "{", "}", "~", "*", "?" ] >
-| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
- "[", "]", "\"", "{", "}", "~", "*", "?" ]
- | <_ESCAPED_CHAR> ) >
-| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
-| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
-}
-
-