Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (revision 1467002) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (working copy) @@ -52,9 +52,9 @@ @Override public void setUp() throws Exception { super.setUp(); - wildcardInput = new String[] { "übersetzung über*ung", + wildcardInput = new String[] { "*bersetzung über*ung", "Mötley Cr\u00fce Mötl?* Crü?", "Renée Zellweger Ren?? Zellw?ger" }; - wildcardExpected = new String[] { "ubersetzung uber*ung", "motley crue motl?* cru?", + wildcardExpected = new String[] { "*bersetzung uber*ung", "motley crue motl?* cru?", "renee zellweger ren?? zellw?ger" }; prefixInput = new String[] { "übersetzung übersetz*", @@ -74,36 +74,48 @@ a = new ASCIIAnalyzer(); } + public void testWildCardQueryNoLeadingAllowed() { + boolean ex = false; + try{ + String q = parseWithAnalyzingQueryParser(wildcardInput[0], a, false); + + } catch (ParseException e){ + ex = true; + } + assertEquals("Testing wildcard with wildcard with initial wildcard not allowed", + true, ex); + } public void testWildCardQuery() throws ParseException { for (int i = 0; i < wildcardInput.length; i++) { assertEquals("Testing wildcards with analyzer " + a.getClass() + ", input string: " - + wildcardInput[i], wildcardExpected[i], parseWithAnalyzingQueryParser(wildcardInput[i], a)); + + wildcardInput[i], wildcardExpected[i], parseWithAnalyzingQueryParser(wildcardInput[i], a, true)); } } public void testPrefixQuery() throws ParseException { for (int i = 0; i < prefixInput.length; i++) { assertEquals("Testing prefixes with analyzer " + a.getClass() + ", input string: " - + prefixInput[i], prefixExpected[i], parseWithAnalyzingQueryParser(prefixInput[i], a)); + + prefixInput[i], prefixExpected[i], parseWithAnalyzingQueryParser(prefixInput[i], a, false)); } } public void testRangeQuery() throws ParseException { for (int i = 0; i < rangeInput.length; i++) { assertEquals("Testing ranges with analyzer " + a.getClass() + ", input string: " - + rangeInput[i], rangeExpected[i], parseWithAnalyzingQueryParser(rangeInput[i], a)); + + rangeInput[i], rangeExpected[i], parseWithAnalyzingQueryParser(rangeInput[i], a, false)); } } public void testFuzzyQuery() throws ParseException { for (int i = 0; i < fuzzyInput.length; i++) { assertEquals("Testing fuzzys with analyzer " + a.getClass() + ", input string: " - + fuzzyInput[i], fuzzyExpected[i], parseWithAnalyzingQueryParser(fuzzyInput[i], a)); + + fuzzyInput[i], fuzzyExpected[i], parseWithAnalyzingQueryParser(fuzzyInput[i], a, false)); } } - private String parseWithAnalyzingQueryParser(String s, Analyzer a) throws ParseException { + private String parseWithAnalyzingQueryParser(String s, Analyzer a, boolean allowLeadingWildcard) throws ParseException { AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, "field", a); + qp.setAllowLeadingWildcard(allowLeadingWildcard); org.apache.lucene.search.Query q = qp.parse(s); return q.toString("field"); } @@ -171,4 +183,4 @@ ir.close(); ramDir.close(); } -} \ No newline at end of file +} Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java (revision 1467002) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java (working copy) @@ -21,6 +21,8 @@ import java.io.StringReader; import java.util.ArrayList; import java.util.List; +import java.util.regex.Pattern; +import java.util.regex.Matcher; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -43,11 +45,16 @@ */ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.QueryParser { + private final Pattern nonWildcardPattern = Pattern.compile("(?s)([^\\?\\*]+)"); + + /** * Constructs a query parser. * @param field the default field for query terms. * @param analyzer used to find terms in the query text. */ + + public AnalyzingQueryParser(Version matchVersion, String field, Analyzer analyzer) { super(matchVersion, field, analyzer); setAnalyzeRangeTerms(true); @@ -75,101 +82,56 @@ */ @Override protected Query getWildcardQuery(String field, String termStr) throws ParseException { - List tlist = new ArrayList(); - List wlist = new ArrayList(); - /* somewhat a hack: find/store wildcard chars - * in order to put them back after analyzing */ - boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*")); - StringBuilder tmpBuffer = new StringBuilder(); - char[] chars = termStr.toCharArray(); - for (int i = 0; i < termStr.length(); i++) { - if (chars[i] == '?' || chars[i] == '*') { - if (isWithinToken) { - tlist.add(tmpBuffer.toString()); - tmpBuffer.setLength(0); - } - isWithinToken = false; - } else { - if (!isWithinToken) { - wlist.add(tmpBuffer.toString()); - tmpBuffer.setLength(0); - } - isWithinToken = true; - } - tmpBuffer.append(chars[i]); - } - if (isWithinToken) { - tlist.add(tmpBuffer.toString()); - } else { - wlist.add(tmpBuffer.toString()); - } + String normalized = normalizeMustBeSingleTerm(field, termStr); + return super.getWildcardQuery(field, normalized); - // get Analyzer from superclass and tokenize the term - TokenStream source; - - int countTokens = 0; - try { - source = getAnalyzer().tokenStream(field, new StringReader(termStr)); - source.reset(); - } catch (IOException e1) { - throw new RuntimeException(e1); - } - CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); - while (true) { - try { - if (!source.incrementToken()) break; - } catch (IOException e) { - break; - } - String term = termAtt.toString(); - if (!"".equals(term)) { - try { - tlist.set(countTokens++, term); - } catch (IndexOutOfBoundsException ioobe) { - countTokens = -1; + } + + private String normalizeMustBeSingleTerm(String field, String termStr) throws ParseException{ + Matcher nonWildcardMatcher = nonWildcardPattern.matcher(termStr); + StringBuilder sb = new StringBuilder(); + nonWildcardMatcher.reset(termStr); + int last = 0; + List list = new ArrayList(); + while (nonWildcardMatcher.find()){ + String bit = nonWildcardMatcher.group(1); + sb.append(termStr.substring(last, nonWildcardMatcher.start())); + list.clear(); + TokenStream stream = null; + try{ + stream = getAnalyzer().tokenStream(field, new StringReader(bit)); + stream.reset(); + CharTermAttribute termAtt = stream.getAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class); + while (stream.incrementToken()){ + list.add(termAtt.toString()); } - } - } - try { - source.end(); - source.close(); - } catch (IOException e) { - // ignore - } + stream.end(); - if (countTokens != tlist.size()) { - /* this means that the analyzer used either added or consumed - * (common for a stemmer) tokens, and we can't build a WildcardQuery */ - throw new ParseException("Cannot build WildcardQuery with analyzer " - + getAnalyzer().getClass() + " - tokens added or lost"); - } - - if (tlist.size() == 0) { - return null; - } else if (tlist.size() == 1) { - if (wlist != null && wlist.size() == 1) { - /* if wlist contains one wildcard, it must be at the end, because: - * 1) wildcards are not allowed in 1st position of a term by QueryParser - * 2) if wildcard was *not* in end, there would be *two* or more tokens */ - return super.getWildcardQuery(field, tlist.get(0) - + wlist.get(0).toString()); - } else { - /* we should never get here! if so, this method was called - * with a termStr containing no wildcard ... */ - throw new IllegalArgumentException("getWildcardQuery called without wildcard"); - } - } else { - /* the term was tokenized, let's rebuild to one token - * with wildcards put back in postion */ - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < tlist.size(); i++) { - sb.append( tlist.get(i)); - if (wlist != null && wlist.size() > i) { - sb.append(wlist.get(i)); + } catch (IOException e){ + throw new ParseException( + String.format("IO error while trying to analyze/normalize single term: %s", + termStr)); + } finally { + if (stream != null){ + try { + stream.close(); + } catch (IOException e){ + //swallow + } } } - return super.getWildcardQuery(field, sb.toString()); + if (list.isEmpty()){ + throw new ParseException("Analyzer must be dropping stop words?!"); + } else if (list.size() > 1){ + throw new ParseException( + String.format("There is a term breaking character between %s and %s\"", + list.get(0),list.get(1))); + } + last = nonWildcardMatcher.end(); + sb.append(list.get(0)); } + sb.append(termStr.substring(last)); + return sb.toString(); } /**