Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java (revision 1573017) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java (working copy) @@ -113,7 +113,7 @@ } public void testPosIncrementAnalyzer() throws ParseException { - QueryParser qp = new QueryParser(Version.LUCENE_40, "", new PosIncrementAnalyzer()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "", new PosIncrementAnalyzer()); assertEquals("quick brown", qp.parse("the quick brown").toString()); assertEquals("quick brown fox", qp.parse("the quick brown fox").toString()); } @@ -122,8 +122,11 @@ * Expands "multi" to "multi" and "multi2", both at the same position, * and expands "triplemulti" to "triplemulti", "multi3", and "multi2". */ - private class MultiAnalyzer extends Analyzer { + protected class MultiAnalyzer extends Analyzer { + public MultiAnalyzer() { + } + @Override public TokenStreamComponents createComponents(String fieldName) { Tokenizer result = new MockTokenizer(MockTokenizer.WHITESPACE, true); @@ -193,8 +196,12 @@ * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). * Does not work correctly for input other than "the quick brown ...". */ - private class PosIncrementAnalyzer extends Analyzer { + protected class PosIncrementAnalyzer extends Analyzer { + public PosIncrementAnalyzer() { + // TODO Auto-generated constructor stub + } + @Override public TokenStreamComponents createComponents(String fieldName) { Tokenizer result = new MockTokenizer(MockTokenizer.WHITESPACE, true); Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestMultiAnalyzer.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestMultiAnalyzer.java (revision 1573017) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestMultiAnalyzer.java (working copy) @@ -1,213 +0,0 @@ -package org.apache.lucene.queryparser.spans; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.queryparser.spans.SpanQueryParser; - -/** - * Test SpanQueryParser's ability to deal with Analyzers that return more - * than one token per position or that return tokens with a position - * increment > 1. - * - * Copied nearly verbatim from TestMultiAnalyzer for classic QueryParser!!! - * - */ -public class TestMultiAnalyzer extends BaseTokenStreamTestCase { - private static int multiToken = 0; - - public void testMultiAnalyzer() throws ParseException { - - SpanQueryParser qp = new SpanQueryParser(TEST_VERSION_CURRENT, "", new MultiAnalyzer()); - - // trivial, no multiple tokens: - assertEquals("foo", qp.parse("foo").toString()); - assertEquals("foo", qp.parse("\"foo\"").toString()); - assertEquals("foo foobar", qp.parse("foo foobar").toString()); - assertEquals("spanNear([foo, foobar], 0, true)", qp.parse("\"foo foobar\"").toString()); - assertEquals("spanNear([foo, foobar, blah], 0, true)", qp.parse("\"foo foobar blah\"").toString()); - - // two tokens at the same position: - assertEquals("spanOr([multi, multi2]) foo", qp.parse("multi foo").toString()); - assertEquals("foo spanOr([multi, multi2])", qp.parse("foo multi").toString()); - assertEquals("spanOr([multi, multi2]) spanOr([multi, multi2])", qp.parse("multi multi").toString()); - assertEquals("+(foo spanOr([multi, multi2])) +(bar spanOr([multi, multi2]))", - qp.parse("+(foo multi) +(bar multi)").toString()); - assertEquals("+(foo spanOr([multi, multi2])) spanNear([field:bar, spanOr([field:multi, field:multi2])], 0, true)", - qp.parse("+(foo multi) field:\"bar multi\"").toString()); - - // phrases: - assertEquals("spanNear([spanOr([multi, multi2]), foo], 0, true)", qp.parse("\"multi foo\"").toString()); - assertEquals("spanNear([foo, spanOr([multi, multi2])], 0, true)", qp.parse("\"foo multi\"").toString()); - assertEquals("spanNear([foo, spanOr([multi, multi2]), foobar, spanOr([multi, multi2])], 0, true)", - qp.parse("\"foo multi foobar multi\"").toString()); - - // fields: - assertEquals("spanOr([field:multi, field:multi2]) field:foo", qp.parse("field:multi field:foo").toString()); - assertEquals("spanNear([spanOr([field:multi, field:multi2]), field:foo], 0, true)", qp.parse("field:\"multi foo\"").toString()); - - // three tokens at one position: - assertEquals("spanOr([triplemulti, multi3, multi2])", qp.parse("triplemulti").toString()); - assertEquals("foo spanOr([triplemulti, multi3, multi2]) foobar", - qp.parse("foo triplemulti foobar").toString()); - - // phrase with non-default slop: - assertEquals("spanNear([spanOr([multi, multi2]), foo], 10, false)", qp.parse("\"multi foo\"~10").toString()); - - // phrase with non-default boost: - assertEquals("spanNear([spanOr([multi, multi2]), foo], 0, true)^2.0", qp.parse("\"multi foo\"^2").toString()); - - // phrase after changing default slop - qp.setPhraseSlop(99); - assertEquals("spanNear([spanOr([multi, multi2]), foo], 99, false) bar", - qp.parse("\"multi foo\" bar").toString()); - assertEquals("spanNear([spanOr([multi, multi2]), foo], 99, false) spanNear([foo, bar], 2, false)", - qp.parse("\"multi foo\" \"foo bar\"~2").toString()); - qp.setPhraseSlop(0); - } - - public void testPosIncrementAnalyzer() throws ParseException { - SpanQueryParser qp = new SpanQueryParser(TEST_VERSION_CURRENT,"", new PosIncrementAnalyzer()); - assertEquals("quick brown", qp.parse("the quick brown").toString()); - assertEquals("quick brown fox", qp.parse("the quick brown fox").toString()); - } - - /** - * Expands "multi" to "multi" and "multi2", both at the same position, - * and expands "triplemulti" to "triplemulti", "multi3", and "multi2". - */ - private class MultiAnalyzer extends Analyzer { - - @Override - public TokenStreamComponents createComponents(String fieldName) { - Tokenizer result = new MockTokenizer(MockTokenizer.WHITESPACE, true); - return new TokenStreamComponents(result, new TestFilter(result)); - } - } - - private final class TestFilter extends TokenFilter { - - private String prevType; - private int prevStartOffset; - private int prevEndOffset; - - private final CharTermAttribute termAtt; - private final PositionIncrementAttribute posIncrAtt; - private final OffsetAttribute offsetAtt; - private final TypeAttribute typeAtt; - - public TestFilter(TokenStream in) { - super(in); - termAtt = addAttribute(CharTermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - } - - @Override - public final boolean incrementToken() throws IOException { - if (multiToken > 0) { - termAtt.setEmpty().append("multi"+(multiToken+1)); - offsetAtt.setOffset(prevStartOffset, prevEndOffset); - typeAtt.setType(prevType); - posIncrAtt.setPositionIncrement(0); - multiToken--; - return true; - } else { - boolean next = input.incrementToken(); - if (!next) { - return false; - } - prevType = typeAtt.type(); - prevStartOffset = offsetAtt.startOffset(); - prevEndOffset = offsetAtt.endOffset(); - String text = termAtt.toString(); - if (text.equals("triplemulti")) { - multiToken = 2; - return true; - } else if (text.equals("multi")) { - multiToken = 1; - return true; - } else { - return true; - } - } - } - - @Override - public void reset() throws IOException { - super.reset(); - this.prevType = null; - this.prevStartOffset = 0; - this.prevEndOffset = 0; - } - } - - /** - * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). - * Does not work correctly for input other than "the quick brown ...". - */ - private class PosIncrementAnalyzer extends Analyzer { - - @Override - public TokenStreamComponents createComponents(String fieldName) { - Tokenizer result = new MockTokenizer(MockTokenizer.WHITESPACE, true); - return new TokenStreamComponents(result, new TestPosIncrementFilter(result)); - } - } - - private final class TestPosIncrementFilter extends TokenFilter { - - CharTermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; - - public TestPosIncrementFilter(TokenStream in) { - super(in); - termAtt = addAttribute(CharTermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - } - - @Override - public final boolean incrementToken() throws IOException { - while(input.incrementToken()) { - if (termAtt.toString().equals("the")) { - // stopword, do nothing - } else if (termAtt.toString().equals("quick")) { - posIncrAtt.setPositionIncrement(2); - return true; - } else { - posIncrAtt.setPositionIncrement(1); - return true; - } - } - return false; - } - } -} Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestComplexPhraseSpanQuery.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestComplexPhraseSpanQuery.java (revision 1573017) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestComplexPhraseSpanQuery.java (working copy) @@ -17,151 +17,45 @@ * limitations under the License. */ -import java.util.HashSet; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.StoredDocument; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.queryparser.complexPhrase.TestComplexPhraseQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; /** - * Copied and pasted from TestComplexPhraseSpanQuery r1569314. + * Copied and pasted from TestComplexPhraseQuery r1569314. * Had to make small changes in syntax. */ -public class TestComplexPhraseSpanQuery extends LuceneTestCase { - Directory rd; - Analyzer analyzer; - - DocData docsContent[] = { new DocData("john smith", "1"), - new DocData("johathon smith", "2"), - new DocData("john percival smith", "3"), - new DocData("jackson waits tom", "4") }; +public class TestComplexPhraseSpanQuery extends TestComplexPhraseQuery { - private IndexSearcher searcher; - private IndexReader reader; - - String defaultFieldName = "name"; - - public void testComplexPhrases() throws Exception { - checkMatches("\"john smith\"", "1"); // Simple multi-term still works - checkMatches("\"j* smyth~\"", "1,2"); // wildcards and fuzzies are OK in - // phrases - //WAS: -// checkMatches("\"(jo* -john) smith\"", "2"); // boolean logic works - checkMatches("\"[jo* john]!~ smith\"", "2"); // boolean logic works - checkMatches("\"jo* smith\"~2", "1,2,3"); // position logic works. - checkMatches("\"jo* [sma TO smZ]\" ", "1,2"); // range queries supported - checkMatches("\"john\"", "1,3"); // Simple single-term still works + @Override + public Query getQuery(String qString) throws Exception { + SpanQueryParser p = new SpanQueryParser(TEST_VERSION_CURRENT, + defaultFieldName, analyzer); + return p.parse(qString); + } + + @Override + public void testParserSpecificSyntax() throws Exception { + //can't have boolean operators within a SpanNear + //must rewrite as SpanNot !~ or ( OR ) clauses without the "OR" + checkMatches("\"[jo* john]!~ smith\"", "2"); + checkMatches("\"(john johathon) smith\"", "1,2"); + checkMatches("\"[jo* john]!~ smyth~\"", "2"); + checkMatches("\"john percival\"!~2,2", "1"); + //check multiterms with no hits + checkMatches("\"john nosuchword*\"", ""); + checkMatches("\"john nosuchw?rd\"!~2,3", "1,3"); + checkMatches("\"nosuchw?rd john\"!~2,3", ""); + checkMatches("\"nosuchw?rd john\"", ""); + //WAS: -// checkMatches("\"(john OR johathon) smith\"", "1,2"); // boolean logic with - checkMatches("\"(john johathon) smith\"", "1,2"); // boolean logic with - // brackets works. - //WAS: -// checkMatches("\"(jo* -john) smyth~\"", "2"); // boolean logic with - checkMatches("\"[jo* john]!~ smyth~\"", "2"); // boolean logic with - // brackets works. - //WAS: - // checkMatches("\"john -percival\"", "1"); // not logic doesn't work - checkMatches("\"john percival\"!~2,2", "1"); // not logic doesn't work - - checkMatches("\"john nosuchword*\"", ""); // phrases with clauses producing - checkMatches("\"john nosuchw?rd\"!~2,3", "1,3"); // phrases with clauses producing - checkMatches("\"nosuchw?rd john\"!~2,3", ""); // phrases with clauses producing - checkMatches("\"nosuchw?rd john\"", ""); // phrases with clauses producing - - // empty sets - checkBadQuery("\"jo* id:1 smith\""); // mixing fields in a phrase is bad - //WAS: - //checkBadQuery("\"jo* \"smith\" \""); // phrases inside phrases is bad + //checkBadQuery("\"jo* \"smith\" \""); //IS: ignore test. SpanQueryParser will parse this as //1) "jo* " //2) smith //3) " " checkBadQuery("\"(jo* -john) smith\""); // can't have boolean operators in phrase - } - private void checkBadQuery(String qString) { - SpanQueryParser qp = new SpanQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); - Throwable expected = null; - try { - qp.parse(qString); - } catch (Throwable e) { - expected = e; - } - assertNotNull("Expected parse error in " + qString, expected); } - private void checkMatches(String qString, String expectedVals) - throws Exception { - SpanQueryParser qp = new SpanQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); - qp.setFuzzyPrefixLength(1); // usually a good idea - - Query q = qp.parse(qString); - System.out.println("QUERY: " + q.toString()); - HashSet expecteds = new HashSet(); - String[] vals = expectedVals.split(","); - for (int i = 0; i < vals.length; i++) { - if (vals[i].length() > 0) - expecteds.add(vals[i]); - } - - TopDocs td = searcher.search(q, 10); - ScoreDoc[] sd = td.scoreDocs; - for (int i = 0; i < sd.length; i++) { - StoredDocument doc = searcher.doc(sd[i].doc); - String id = doc.get("id"); - assertTrue(qString + "matched doc#" + id + " not expected", expecteds - .contains(id)); - expecteds.remove(id); - } - assertEquals(qString + " missing some matches ", 0, expecteds.size()); - } - - @Override - public void setUp() throws Exception { - super.setUp(); - - analyzer = new MockAnalyzer(random()); - rd = newDirectory(); - IndexWriter w = new IndexWriter(rd, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); - for (int i = 0; i < docsContent.length; i++) { - Document doc = new Document(); - doc.add(newTextField("name", docsContent[i].name, Field.Store.YES)); - doc.add(newTextField("id", docsContent[i].id, Field.Store.YES)); - w.addDocument(doc); - } - w.close(); - reader = DirectoryReader.open(rd); - searcher = newSearcher(reader); - } - - @Override - public void tearDown() throws Exception { - reader.close(); - rd.close(); - super.tearDown(); - } - - static class DocData { - String name; - - String id; - - public DocData(String name, String id) { - super(); - this.name = name; - this.id = id; - } - } } Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestQPTestBaseSpanQuery.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestQPTestBaseSpanQuery.java (revision 0) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestQPTestBaseSpanQuery.java (revision 0) @@ -0,0 +1,489 @@ +package org.apache.lucene.queryparser.spans; + + +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.DateTools.Resolution; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser.Operator; +import org.apache.lucene.queryparser.classic.QueryParserBase; +import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration; +import org.apache.lucene.queryparser.util.QueryParserTestBase; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.junit.Ignore; + +public class TestQPTestBaseSpanQuery extends QueryParserTestBase { + + + @Override + public void testDefaultOperator() throws Exception { + SpanQueryParser qp = getParser(new MockAnalyzer(random())); + // make sure OR is the default: + assertEquals(QueryParserBase.OR_OPERATOR, qp.getDefaultOperator()); + setDefaultOperatorAND(qp); + assertEquals(QueryParserBase.AND_OPERATOR, qp.getDefaultOperator()); + setDefaultOperatorOR(qp); + assertEquals(QueryParserBase.OR_OPERATOR, qp.getDefaultOperator()); + + } + + @Override + public void testStarParsing() throws Exception { + // TODO Auto-generated method stub + + } + + @Override + public void testNewFieldQuery() throws Exception { + // TODO Auto-generated method stub + + } + + @Override + public CommonQueryParserConfiguration getParserConfig(Analyzer a) + throws Exception { + return getParser(a); + } + + public SpanQueryParser getParser(Analyzer a) { + if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); + SpanQueryParser qp = new SpanQueryParser(TEST_VERSION_CURRENT, getDefaultField(), a); + qp.setDefaultOperator(QueryParserBase.OR_OPERATOR); + return qp; + } + + @Override + public void setDefaultOperatorOR(CommonQueryParserConfiguration cqpC) { + assert (cqpC instanceof SpanQueryParser); + ((SpanQueryParser) cqpC).setDefaultOperator(Operator.OR); + } + + @Override + public void setDefaultOperatorAND(CommonQueryParserConfiguration cqpC) { + assert (cqpC instanceof SpanQueryParser); + ((SpanQueryParser) cqpC).setDefaultOperator(Operator.AND); + } + + @Override + public void setAnalyzeRangeTerms(CommonQueryParserConfiguration cqpC, + boolean value) { + assert (cqpC instanceof SpanQueryParser); + ((SpanQueryParser)cqpC).setAnalyzeRangeTerms(value); + } + + @Override + public void setAutoGeneratePhraseQueries(CommonQueryParserConfiguration cqpC, + boolean value) { + assert (cqpC instanceof SpanQueryParser); + ((SpanQueryParser)cqpC).setAutoGeneratePhraseQueries(value); + } + + @Override + public void setDateResolution(CommonQueryParserConfiguration cqpC, + CharSequence field, Resolution value) { + assert (cqpC instanceof SpanQueryParser); + ((SpanQueryParser)cqpC).setDateResolution(field.toString(), value); + } + + @Override + public Query getQuery(String query, CommonQueryParserConfiguration cqpC) + throws Exception { + assert cqpC != null : "Parameter must not be null"; + assert (cqpC instanceof SpanQueryParser) : "Parameter must be instance of QueryParser"; + SpanQueryParser qp = (SpanQueryParser) cqpC; + return qp.parse(query); + } + + @Override + public Query getQuery(String query, Analyzer a) throws Exception { + SpanQueryParser p = getParser(a); + return p.parse(query); + } + + @Override + public boolean isQueryParserException(Exception exception) { + if (exception instanceof ParseException) { + return true; + } + return false; + } + + @Override + public void assertQueryEquals(CommonQueryParserConfiguration cqpC, String field, String query, String result) throws Exception { + Query q = getQuery(query, cqpC); + if (q instanceof SpanMultiTermQueryWrapper) { + @SuppressWarnings("rawtypes") + Query tmp = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); + tmp.setBoost(q.getBoost()); + q = tmp; + } + assertEquals(result, q.toString(field)); + } + + @Override + public void assertQueryEquals(String query, Analyzer a, String result) throws Exception { + Query q = getQuery(query, a); + if (q instanceof SpanMultiTermQueryWrapper) { + @SuppressWarnings("rawtypes") + Query tmp = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); + tmp.setBoost(q.getBoost()); + q = tmp; + } else if (q instanceof SpanOrQuery){ + if (((SpanOrQuery)q).getClauses().length == 0){ + q = new BooleanQuery(); + } + } + assertEquals(result, q.toString("field")); + } + + public void assertQueryEqualsCMP(String query, Analyzer a, String result) throws Exception { + Query q = getQuery(query, a); + if (q instanceof SpanMultiTermQueryWrapper){ + @SuppressWarnings("rawtypes") + Query tmp = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); + tmp.setBoost(q.getBoost()); + q = tmp; + } else if (q instanceof SpanOrQuery){ + if (((SpanOrQuery)q).getClauses().length == 0){ + q = new BooleanQuery(); + } + } + assertEquals(result, q.toString("field")); + } + + @Override + public void assertQueryEquals(Query expected, Query test) { + assertEquals("boost", expected.getBoost(), test.getBoost(), 0.0001f); + if (test instanceof SpanMultiTermQueryWrapper){ + @SuppressWarnings("rawtypes") + Query tmp = ((SpanMultiTermQueryWrapper)test).getWrappedQuery(); + tmp.setBoost(test.getBoost()); + test = tmp; + } else if (test instanceof SpanOrQuery){ + if (((SpanOrQuery)test).getClauses().length == 0){ + test = new BooleanQuery(); + } + } else if (test instanceof BooleanQuery && expected instanceof BooleanQuery){ + //lots of reasons why this simple equivalence won't work + //but it works well enough for current tests + BooleanClause[] exClause = ((BooleanQuery)expected).getClauses(); + BooleanClause[] testClause = ((BooleanQuery)test).getClauses(); + assertEquals("boolean clause length =", exClause.length, testClause.length); + for (int i = 0; i < exClause.length; i++){ + assertTrue(exClause[i].getOccur().equals(testClause[i].getOccur())); + //recur + assertQueryEquals(exClause[i].getQuery(), testClause[i].getQuery()); + } + return; + } else if (test instanceof SpanNearQuery && expected instanceof PhraseQuery){ + //lots of reasons why this simple equivalence won't work + //but it works well enough for current tests + Term[] exTerms = ((PhraseQuery)expected).getTerms(); + SpanQuery[] testClauses = ((SpanNearQuery)test).getClauses(); + assertEquals("phrase clause length =", exTerms.length, testClauses.length); + for (int i = 0; i < exTerms.length; i++){ + assertEquals(exTerms[i].field()+":"+exTerms[i].text(), + testClauses[i].toString()); + } + assertEquals("slop", ((SpanNearQuery)test).getSlop(), ((PhraseQuery)expected).getSlop()); + return; + + } + super.assertQueryEquals(expected, test); + } + + @Override + public void assertFuzzyQueryEquals(String field, String term, int maxEdits, int prefixLen, Query query) { + assert(query instanceof SpanMultiTermQueryWrapper); + @SuppressWarnings("rawtypes") + Query wrapped = ((SpanMultiTermQueryWrapper)query).getWrappedQuery(); + super.assertFuzzyQueryEquals(field, term, maxEdits, prefixLen, wrapped); + } + + @Override + public void assertWildcardQueryEquals(String query, boolean lowercase, String result, boolean allowLeadingWildcard) throws Exception { + CommonQueryParserConfiguration cqpC = getParserConfig(null); + cqpC.setLowercaseExpandedTerms(lowercase); + cqpC.setAllowLeadingWildcard(allowLeadingWildcard); + assertQueryEquals(cqpC, "field", query, result); + } + + @Override + public void assertWildcardQueryEquals(String query, String result) throws Exception { + assertQueryEquals(query, null, result); + } + + @Override + public void assertEmpty(Query q){ + boolean e = false; + if (q instanceof SpanOrQuery && ((SpanOrQuery)q).getClauses().length == 0) { + e = true; + } + assertTrue("Empty: "+q.toString(), e); + } + + @SuppressWarnings("rawtypes") + @Override + public void assertInstanceOf(Query q, Class other) { + if (q instanceof SpanMultiTermQueryWrapper) { + q = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); + } else if (q instanceof SpanTermQuery && other.equals(TermQuery.class)) { + assertTrue("termquery", true); + return; + } else if (q instanceof SpanNearQuery && other.equals(PhraseQuery.class)) { + assertTrue("spannear/phrase", true); + return; + } else if (q instanceof SpanOrQuery && other.equals(BooleanQuery.class)) { + assertTrue("spanor/boolean", true); + return; + } + assertTrue(q.getClass().isAssignableFrom(other)); + } + + @Override + protected String escapeDateString(String s) { + if (s.indexOf(" ") > -1 || s.indexOf("/") > -1 || s.indexOf("-") > -1) { + return "\'" + s + "\'"; + } else { + return s; + } + } + + /** + * Overridden tests follow + */ + + @Override + public void testCJKTerm() throws Exception { + // individual CJK chars as terms + SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); + + SpanOrQuery expected = new SpanOrQuery( + new SpanQuery[]{ + new SpanTermQuery(new Term("field", "中")), + new SpanTermQuery(new Term("field", "国")) + } + ); + assertEquals(expected, getQuery("中国", analyzer)); + } + + @Override + public void testCJKBoostedTerm() throws Exception { + // individual CJK chars as terms + SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); + + SpanOrQuery expected = new SpanOrQuery( + new SpanTermQuery[] { + new SpanTermQuery(new Term("field", "中")), + new SpanTermQuery(new Term("field", "国")) + + } + ); + expected.setBoost(0.5f); + assertEquals(expected, getQuery("中国^0.5", analyzer)); + } + + @Override + public void testCJKPhrase() throws Exception { + // individual CJK chars as terms + SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); + + SpanNearQuery expected = new SpanNearQuery( + new SpanTermQuery[] { + new SpanTermQuery(new Term("field", "中")), + new SpanTermQuery(new Term("field", "国")) + }, 0, true + ); + + assertEquals(expected, getQuery("\"中国\"", analyzer)); + } + + @Override + public void testNestedAndClausesFoo() throws Exception { + String query = "(field1:[1 TO *] AND field1:[* TO 2]) AND field2:(z)"; + Query q = getQuery(query); + assertEquals("nestedAndClausesFoo", + "+(+SpanMultiTermQueryWrapper(field1:[1 TO *]) +SpanMultiTermQueryWrapper(field1:[* TO 2])) +field2:z", + q.toString()); + } + + @Override + public void testPhraseQueryToString() throws Exception { + //no current equivalence in SpanNearQuery with stop words + } + + @Override + public void testPositionIncrement() throws Exception { + //For SQP, this only tests whether stop words have been dropped. + //PositionIncrements are not available in SpanQueries yet. + CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); + //qp.setEnablePositionIncrements(true); + String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; + // 0 2 5 7 8 + SpanNearQuery pq = (SpanNearQuery) getQuery(qtxt,qp); + SpanQuery[] clauses = pq.getClauses(); + assertEquals(clauses.length, 5); + Set expected = new HashSet(); + expected.add(new Term("field", "words")); + expected.add(new Term("field", "poisitions")); + expected.add(new Term("field", "pos")); + expected.add(new Term("field", "stopped")); + expected.add(new Term("field", "phrasequery")); + + Set terms = new HashSet(); + for (int i = 0; i < clauses.length; i++) { + SpanQuery q = clauses[i]; + q.extractTerms(terms); + } + assertEquals(expected, terms); + } + + @Override + public void testPositionIncrements() throws Exception { + //doesn't apply/known issue with SpanQueries and stop words + } + + @Override + public void testPhraseQueryPositionIncrements() throws Exception { + //doesn't apply + } + + //string query equality tests that have to be rewritten + //if parser is generating a SpanQuery + @SuppressWarnings("rawtypes") + @Override + public void testParserSpecificQuery() throws Exception { + + /* SpanQueryParser doesn't handle ! || && syntax yet + + */ + + assertQueryEquals("term AND \"phrase phrase\"", null, + "+term +spanNear([phrase, phrase], 0, true)"); + assertQueryEquals("\"hello there\"", null, "spanNear([hello, there], 0, true)"); + + assertQueryEquals("\"germ term\"^2.0", null, "spanNear([germ, term], 0, true)^2.0"); + assertQueryEquals("\"term germ\"^2", null, "spanNear([term, germ], 0, true)^2.0"); + + assertQueryEquals("(foo bar) AND (baz boo)", null, + "+(foo bar) +(baz boo)"); + assertQueryEquals("((a b) AND NOT c) d", null, + "(+(a b) -c) d"); + assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, + "+(apple spanNear([steve, jobs], 0, true)) -(foo bar baz)"); + assertQueryEquals("+title:(dog cat) -author:\"bob dole\"", null, + "+(title:dog title:cat) -spanNear([author:bob, author:dole], 0, true)"); + + + //testRegexps + CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); + //SpanQueryParser does not re-escape the backslash + Query escaped = new RegexpQuery(new Term("field", "[a-z]/[123]")); + + assertQueryEquals(escaped, getQuery("/[a-z]\\/[123]/",qp)); + + BooleanQuery complex = new BooleanQuery(); + complex.add( + new RegexpQuery(new Term("field", "[a-z]/[123]")), Occur.MUST); + complex.add(new SpanTermQuery(new Term("path", "/etc/init.d/")), Occur.MUST); + complex.add(new SpanTermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD); + + assertQueryEquals(complex, getQuery( + "/[a-z]\\/[123]/ AND path:\\/etc\\/init.d\\/ OR \\/etc\\/init\\[.\\]d/lucene\\/ ",qp)); + qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); + Query q = getQuery("/[A-Z][123]/^0.5",qp); + assert(q instanceof SpanMultiTermQueryWrapper); + + SpanMultiTermQueryWrapper smtqw = (SpanMultiTermQueryWrapper)q; + //not: MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, + assertEquals(SpanMultiTermQueryWrapper.SCORING_SPAN_QUERY_REWRITE, + smtqw.getRewriteMethod()); + + //testWildcard + //SpanQueryParser cannot parse boost before fuzzy + assertParseException("term^3~"); + + + //testRange + + assertEquals(SpanMultiTermQueryWrapper.SCORING_SPAN_QUERY_REWRITE, smtqw.getRewriteMethod()); + qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + + q = getQuery("[ a TO z]"); + assert(q instanceof SpanMultiTermQueryWrapper); + smtqw = (SpanMultiTermQueryWrapper)q; assertEquals(SpanMultiTermQueryWrapper.SCORING_SPAN_QUERY_REWRITE,smtqw.getRewriteMethod()); + + assertQueryEquals("[ a TO z] OR bar", null, "SpanMultiTermQueryWrapper([a TO z]) bar"); + assertQueryEquals("[ a TO z] AND bar", null, "+SpanMultiTermQueryWrapper([a TO z]) +bar"); + assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar SpanMultiTermQueryWrapper({a TO z})"); + assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar SpanMultiTermQueryWrapper({a TO z}))"); + + //testSlop + assertQueryEquals("\"term germ\"~2 flork", null, "spanNear([term, germ], 2, false) flork"); + + //testEscaped + Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); + //change " to ' for spanquery parser + assertQueryEquals("['c\\:\\\\temp\\\\\\~foo0.txt' TO 'c\\:\\\\temp\\\\\\~foo9.txt']", a, + "[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]"); + assertQueryEquals("\"a \\\"b c\\\" d\"", a, "spanNear([a, \"b, c\", d], 0, true)"); + assertQueryEquals("\"a \\+b c d\"", a, "spanNear([a, +b, c, d], 0, true)"); + assertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a, "spanNear([a, \\(b\", c], 0, true)"); + + //testQPA + assertQueryEquals("term phrase term", qpAnalyzer, + "term spanOr([phrase1, phrase2]) term"); + assertQueryEquals("term AND NOT phrase term", qpAnalyzer, + "+term -spanOr([phrase1, phrase2]) term"); + + + } + + @Ignore + public void testSpanQueryParserFail() throws Exception { + //these are tests that SQP cannot pass + + //testQPA + /** + * Currently, the handling of synonyms is occurs in the lower level Span parsing + * component, not the higher level Boolean component. + * The lower level can't return a BooleanQuery, only a SpanQuery. + * This could probably be fixed. + */ + CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer); + setDefaultOperatorAND(cqpc); + + assertQueryEquals(cqpc, "field", "term phrase term", + "+term +(+phrase1 +phrase2) +term"); + + assertQueryEquals(cqpc, "field", "phrase", + "+phrase1 +phrase2"); + + //testSimple + //no plans to add this syntax unless there is interest + assertQueryEquals("a AND !b", null, "+a -b"); + assertQueryEquals("a && b", null, "+a +b"); + assertQueryEquals("a || b", null, "a b"); + assertQueryEquals("a OR !b", null, "a -b"); + + } +} Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestSpanQueryParserLexer.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestSpanQueryParserLexer.java (revision 1573017) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestSpanQueryParserLexer.java (working copy) @@ -755,7 +755,7 @@ new SQPTerm(s, false) ); } - + private void executeSingleTokenTest(String q, int targetOffset, SQPToken truth) throws ParseException { List tokens = lexer.getTokens(q); Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestSpanOnlyQueryParser.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestSpanOnlyQueryParser.java (revision 1573017) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestSpanOnlyQueryParser.java (working copy) @@ -129,7 +129,7 @@ "\u666E \u6797 \u65AF \u987F \u5927 \u5B66", "reg/exp", "/regex/", - "fuzzy~0.6", + "fuzzy~2", "wil*card", "wil?card", "prefi*", @@ -341,27 +341,13 @@ countSpansDocs(p, "crown~3,1", 0, 0); countSpansDocs(p, "brwn~1,1", 3, 2); - p.setFuzzyMinSim(0.6f); - countSpansDocs(p, "brwon~0.80", 3, 2); + p.setFuzzyMinSim(0.79f); - p.setFuzzyMinSim(0.85f); - countSpansDocs(p, "brwon~0.80", 0, 0); - - p.setFuzzyMinSim(0.80f); - countSpansDocs(p, "brwon~2", 3, 2); - p.setFuzzyMinSim(0.60f); - //this requires edit = 3 - testOffsetForSingleSpanMatch(p, "abcdefgh~0.60", 3, 0, 1); - - p.setFuzzyMinSim(0.65f); - //this requires edit = 3, 63% - countSpansDocs(p, "abcdefgh~0.60", 0, 0); - //fuzzy val of 0 should yield straight SpanTermQuery - Query q = p.parse("brown~0.0"); - assertTrue("fuzzy val = 0.0", q instanceof SpanTermQuery); + Query q = p.parse("brown~0"); + assertTrue("fuzzy val = 0", q instanceof SpanTermQuery); q = p.parse("brown~0"); assertTrue("fuzzy val = 0", q instanceof SpanTermQuery); } @@ -495,15 +481,16 @@ assertEquals(true, exc); } - public void testQuotedSingleTerm() throws Exception{ + public void testQuotedSingleTerm() throws Exception { SpanOnlyParser p = new SpanOnlyParser(TEST_VERSION_CURRENT, FIELD, noStopAnalyzer); - String[] quoteds = new String[]{ + String[] quoteds = new String[] { "/regex/", - "fuzzy~0.6", + "fuzzy~2", "wil*card", "wil?card", - "prefi*"}; + "prefi*" + }; for (String q : quoteds) { countSpansDocs(p, "\""+q+"\"", 1, 1); Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestMultiAnalyzerSpanQuery.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestMultiAnalyzerSpanQuery.java (revision 0) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestMultiAnalyzerSpanQuery.java (revision 0) @@ -0,0 +1,97 @@ +package org.apache.lucene.queryparser.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.TestMultiAnalyzer; +import org.apache.lucene.queryparser.spans.SpanQueryParser; + +/** + * Test SpanQueryParser's ability to deal with Analyzers that return more + * than one token per position or that return tokens with a position + * increment > 1. + * + * + */ +public class TestMultiAnalyzerSpanQuery extends TestMultiAnalyzer { + + @Override + public void testMultiAnalyzer() throws ParseException { + + SpanQueryParser qp = new SpanQueryParser(TEST_VERSION_CURRENT, "", new MultiAnalyzer()); + + // trivial, no multiple tokens: + assertEquals("foo", qp.parse("foo").toString()); + assertEquals("foo", qp.parse("\"foo\"").toString()); + assertEquals("foo foobar", qp.parse("foo foobar").toString()); + assertEquals("spanNear([foo, foobar], 0, true)", qp.parse("\"foo foobar\"").toString()); + assertEquals("spanNear([foo, foobar, blah], 0, true)", qp.parse("\"foo foobar blah\"").toString()); + + // two tokens at the same position: + assertEquals("spanOr([multi, multi2]) foo", qp.parse("multi foo").toString()); + assertEquals("foo spanOr([multi, multi2])", qp.parse("foo multi").toString()); + assertEquals("spanOr([multi, multi2]) spanOr([multi, multi2])", qp.parse("multi multi").toString()); + assertEquals("+(foo spanOr([multi, multi2])) +(bar spanOr([multi, multi2]))", + qp.parse("+(foo multi) +(bar multi)").toString()); + assertEquals("+(foo spanOr([multi, multi2])) spanNear([field:bar, spanOr([field:multi, field:multi2])], 0, true)", + qp.parse("+(foo multi) field:\"bar multi\"").toString()); + + // phrases: + assertEquals("spanNear([spanOr([multi, multi2]), foo], 0, true)", qp.parse("\"multi foo\"").toString()); + assertEquals("spanNear([foo, spanOr([multi, multi2])], 0, true)", qp.parse("\"foo multi\"").toString()); + assertEquals("spanNear([foo, spanOr([multi, multi2]), foobar, spanOr([multi, multi2])], 0, true)", + qp.parse("\"foo multi foobar multi\"").toString()); + + // fields: + assertEquals("spanOr([field:multi, field:multi2]) field:foo", qp.parse("field:multi field:foo").toString()); + assertEquals("spanNear([spanOr([field:multi, field:multi2]), field:foo], 0, true)", qp.parse("field:\"multi foo\"").toString()); + + // three tokens at one position: + assertEquals("spanOr([triplemulti, multi3, multi2])", qp.parse("triplemulti").toString()); + assertEquals("foo spanOr([triplemulti, multi3, multi2]) foobar", + qp.parse("foo triplemulti foobar").toString()); + + // phrase with non-default slop: + assertEquals("spanNear([spanOr([multi, multi2]), foo], 10, false)", qp.parse("\"multi foo\"~10").toString()); + + // phrase with non-default boost: + assertEquals("spanNear([spanOr([multi, multi2]), foo], 0, true)^2.0", qp.parse("\"multi foo\"^2").toString()); + + // phrase after changing default slop + qp.setPhraseSlop(99); + assertEquals("spanNear([spanOr([multi, multi2]), foo], 99, false) bar", + qp.parse("\"multi foo\" bar").toString()); + assertEquals("spanNear([spanOr([multi, multi2]), foo], 99, false) spanNear([foo, bar], 2, false)", + qp.parse("\"multi foo\" \"foo bar\"~2").toString()); + qp.setPhraseSlop(0); + } + + @Override + public void testMultiAnalyzerWithSubclassOfQueryParser() throws ParseException { + //not testing this for SpanQueryParser + } + + @Override + public void testPosIncrementAnalyzer() throws ParseException { + SpanQueryParser qp = new SpanQueryParser(TEST_VERSION_CURRENT,"", new PosIncrementAnalyzer()); + assertEquals("quick brown", qp.parse("the quick brown").toString()); + assertEquals("quick brown fox", qp.parse("the quick brown fox").toString()); + } + + +} Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestSpanQPBasedOnQPTestBase.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestSpanQPBasedOnQPTestBase.java (revision 1573017) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/spans/TestSpanQPBasedOnQPTestBase.java (working copy) @@ -1,924 +0,0 @@ -package org.apache.lucene.queryparser.spans; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Calendar; -import java.util.GregorianCalendar; -import java.util.HashSet; -import java.util.Locale; -import java.util.Set; -import java.util.TimeZone; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenFilter; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.document.DateTools; -import org.apache.lucene.index.Term; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.queryparser.classic.QueryParser.Operator; -import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration; -import org.apache.lucene.queryparser.spans.SpanQueryParser; -import org.apache.lucene.queryparser.util.QueryParserTestCase; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.RegexpQuery; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.util.automaton.BasicAutomata; -import org.apache.lucene.util.automaton.CharacterRunAutomaton; -import org.apache.lucene.util.automaton.RegExp; -import org.junit.Ignore; - -public class TestSpanQPBasedOnQPTestBase extends QueryParserTestCase { - - @Override - public CommonQueryParserConfiguration getParserConfig(Analyzer a) throws Exception{ - CommonQueryParserConfiguration cqpc = new SpanQueryParser(TEST_VERSION_CURRENT, "field", a); - return cqpc; - } - - @Override - public Query getQuery(String query, Analyzer analyzer) throws Exception { - Analyzer a = (analyzer == null) ? qpAnalyzer : analyzer; - SpanQueryParser p = new SpanQueryParser(TEST_VERSION_CURRENT, "field", a); - return p.parse(query); - } - - @Override - public Query getQuery(String query, CommonQueryParserConfiguration cqpC) throws Exception { - SpanQueryParser p = (SpanQueryParser)cqpC; - return p.parse(query); - } - - @Override - public void setDateResolution(CommonQueryParserConfiguration cqpC, CharSequence field, DateTools.Resolution value) { - assert (cqpC instanceof SpanQueryParser); - ((SpanQueryParser)cqpC).setDateResolution(field.toString(), value); - } - - @Override - public void setAutoGeneratePhraseQueries(CommonQueryParserConfiguration qp, boolean b) { - assert (qp instanceof SpanQueryParser); - ((SpanQueryParser)qp).setAutoGeneratePhraseQueries(b); - } - - @Override - public void setDefaultOperatorAND(CommonQueryParserConfiguration qp) { - ((SpanQueryParser)qp).setDefaultOperator(Operator.AND); - } - - @Override - public void setDefaultOperatorOR(CommonQueryParserConfiguration qp) { - ((SpanQueryParser)qp).setDefaultOperator(Operator.OR); - } - - @Override - public void setAnalyzeRangeTerms(CommonQueryParserConfiguration qp, boolean value) { - ((SpanQueryParser)qp).setAnalyzeRangeTerms(value); - } - - @Override - public boolean isQueryParserException(Exception exception) { - return exception instanceof ParseException; - } - - @Override - protected String escapeDateString(String s) { - if (s.indexOf(" ") > -1 || s.indexOf("/") > -1 || s.indexOf("-") > -1) { - return "\'" + s + "\'"; - } else { - return s; - } - } - - @Override - public void assertQueryEquals(CommonQueryParserConfiguration cqpC, String field, String query, String result) throws Exception { - Query q = getQuery(query, cqpC); - if (q instanceof SpanMultiTermQueryWrapper){ - q = ((SpanMultiTermQueryWrapper)q).getWrappedQuery(); - } - assertEquals(result, q.toString(field)); - } - - public void assertBoostEquals(String query, float b) throws Exception { - Query q = getQuery(query); - assertEquals(b, q.getBoost(), 0.00001); - } - - private void assertEqualsWrappedRegexp(RegexpQuery q, Query query) { - assertTrue(query instanceof SpanMultiTermQueryWrapper); - - SpanMultiTermQueryWrapper wrapped = new SpanMultiTermQueryWrapper(q); - - assertEquals(wrapped, query); - } - - private void assertMultitermEquals(Query query, String expected) throws Exception { - assertMultitermEquals("field", query, expected); - } - - private void assertMultitermEquals(String field, Query query, String expected) throws Exception { - expected = "SpanMultiTermQueryWrapper("+field+":"+ expected+")"; - - //need to trim final .0 for fuzzy queries because - //sometimes they appear in the string and sometimes they don't - expected = expected.replace(".0)", ")"); - String qString = query.toString(); - //strip off the boost...wasn't appearing in toString in 4.6, but is in trunk - qString = qString.replaceAll("\\)\\^\\d+\\.\\d+$", ")"); - qString = qString.replace(".0)", ")"); - assertEquals(expected, qString); - } - - private void assertMultitermEquals(String s, String expected) throws Exception { - assertMultitermEquals(s, qpAnalyzer, expected); - } - - private void assertMultitermEquals(String s, String expected, float boost) throws Exception { - Analyzer a = qpAnalyzer; - SpanQueryParser p = new SpanQueryParser(TEST_VERSION_CURRENT, "field", a); - Query q = p.parse(s); - assertMultitermEquals(q, expected); - assertEquals(q.getBoost(), boost, 0.000001f); - } - - private void assertMultitermEquals(String query, boolean b, String expected) throws Exception { - Analyzer a = qpAnalyzer; - SpanQueryParser p = new SpanQueryParser(TEST_VERSION_CURRENT, "field", a); - p.setLowercaseExpandedTerms(b); - Query q = p.parse(query); - assertMultitermEquals(q, expected); - } - - private void assertMultitermEquals(String field, String query, Analyzer a, String expected) throws Exception { - SpanQueryParser p = new SpanQueryParser(TEST_VERSION_CURRENT, "field", a); - Query q = p.parse(query); - assertMultitermEquals(field, q, expected); - } - - private void assertMultitermEquals(String query, Analyzer a, String expected) throws Exception { - assertMultitermEquals("field", query, a, expected); - } - - private void assertMultitermEquals(String query, boolean lowercase, - String expected, boolean allowLeadingWildcard) throws Exception { - Analyzer a = qpAnalyzer; - SpanQueryParser p = new SpanQueryParser(TEST_VERSION_CURRENT, "field", a); - p.setLowercaseExpandedTerms(lowercase); - p.setAllowLeadingWildcard(allowLeadingWildcard); - Query q = p.parse(query); - assertMultitermEquals(q, expected); - } - - public void testCJK() throws Exception { - // Test Ideographic Space - As wide as a CJK character cell (fullwidth) - // used google to translate the word "term" to japanese -> 用語 - assertQueryEquals("term\u3000term\u3000term", null, "term\u0020term\u0020term"); - assertQueryEquals("用語\u3000用語\u3000用語", null, "用語\u0020用語\u0020用語"); - } - - public void testCJKTerm() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - SpanOrQuery expected = new SpanOrQuery( - new SpanQuery[]{ - new SpanTermQuery(new Term("field", "中")), - new SpanTermQuery(new Term("field", "国")) - }); - - assertEquals(expected, getQuery("中国", analyzer)); - } - - public void testCJKBoostedTerm() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - SpanOrQuery expected = new SpanOrQuery( - new SpanQuery[]{ - new SpanTermQuery(new Term("field", "中")), - new SpanTermQuery(new Term("field", "国")) - }); - expected.setBoost(0.5f); - - assertEquals(expected, getQuery("中国^0.5", analyzer)); - } - - public void testCJKPhrase() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - SpanNearQuery expected = new SpanNearQuery( - new SpanQuery[]{ - new SpanTermQuery(new Term("field", "中")), - new SpanTermQuery(new Term("field", "国")) - }, 0, true); - - assertEquals(expected, getQuery("\"中国\"", analyzer)); - } - - public void testCJKBoostedPhrase() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - SpanNearQuery expected = new SpanNearQuery( - new SpanQuery[]{ - new SpanTermQuery(new Term("field", "中")), - new SpanTermQuery(new Term("field", "国")) - }, 0, true); - expected.setBoost(0.5f); - assertEquals(expected, getQuery("\"中国\"^0.5", analyzer)); - } - - public void testCJKSloppyPhrase() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - SpanNearQuery expected = new SpanNearQuery( - new SpanQuery[]{ - new SpanTermQuery(new Term("field", "中")), - new SpanTermQuery(new Term("field", "国")) - }, 3, false); - - assertEquals(expected, getQuery("\"中国\"~3", analyzer)); - } - - public void testAutoGeneratePhraseQueriesOn() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - SpanNearQuery expected = new SpanNearQuery( - new SpanTermQuery[]{ - new SpanTermQuery(new Term("field", "中")), - new SpanTermQuery(new Term("field", "国")) - }, 0, true); - CommonQueryParserConfiguration qp = getParserConfig(analyzer); - setAutoGeneratePhraseQueries(qp, true); - assertEquals(expected, getQuery("中国",qp)); - } - - public void testSimple() throws Exception { - assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("türm term term", new MockAnalyzer(random()), "türm term term"); - assertQueryEquals("ümlaut", new MockAnalyzer(random()), "ümlaut"); - - assertQueryEquals("a AND b", null, "+a +b"); - assertQueryEquals("(a AND b)", null, "+a +b"); - assertQueryEquals("c (a AND b)", null, "c (+a +b)"); - assertQueryEquals("a AND NOT b", null, "+a -b"); - assertQueryEquals("a AND -b", null, "+a -b"); - - assertQueryEquals("a b", null, "a b"); - assertQueryEquals("a -b", null, "a -b"); - - assertQueryEquals("+term -term term", null, "+term -term term"); - assertQueryEquals("foo:term AND field:anotherTerm", null, - "+foo:term +anotherterm"); - assertQueryEquals("term AND \"phrase phrase\"", null, - "+term +spanNear([spanOr([phrase1, phrase2]), "+ - "spanOr([phrase1, phrase2])], 0, true)"); - assertQueryEquals("\"hello there\"", null, "spanNear([hello, there], 0, true)"); - assertTrue(getQuery("a AND b") instanceof BooleanQuery); - assertTrue(getQuery("hello") instanceof SpanTermQuery); - assertTrue(getQuery("\"hello there\"") instanceof SpanNearQuery); - - assertQueryEquals("germ term^2.0", null, "germ term^2.0"); - assertQueryEquals("(term)^2.0", null, "term^2.0"); - assertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0"); - assertQueryEquals("term^2.0", null, "term^2.0"); - assertQueryEquals("term^2", null, "term^2.0"); - assertQueryEquals("\"germ term\"^2.0", null, "spanNear([germ, term], 0, true)^2.0"); - assertQueryEquals("\"term germ\"^2", null, "spanNear([term, germ], 0, true)^2.0"); - - assertQueryEquals("(foo bar) AND (baz boo)", null, - "+(foo bar) +(baz boo)"); - assertQueryEquals("((a b) AND NOT c) d", null, - "(+(a b) -c) d"); - assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, - "+(apple spanNear([steve, jobs], 0, true)) -(foo bar baz)"); - assertQueryEquals("+title:(dog cat) -author:\"bob dole\"", null, - "+(title:dog title:cat) -spanNear([author:bob, author:dole], 0, true)"); - } - - public void testOperatorVsWhitespace() throws Exception { //LUCENE-2566 - // +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator - Analyzer a = new Analyzer() { - @Override - public TokenStreamComponents createComponents(String fieldName) { - return new TokenStreamComponents(new MockTokenizer(MockTokenizer.WHITESPACE, false)); - } - }; - assertQueryEquals("a - b", a, "a - b"); - assertQueryEquals("a + b", a, "a + b"); - assertQueryEquals("a ! b", a, "a ! b"); - } - - public void testPunct() throws Exception { - Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); - assertQueryEquals("a&b", a, "a&b"); - assertQueryEquals("a&&b", a, "a&&b"); - assertQueryEquals(".NET", a, ".NET"); - } - - public void testSlop() throws Exception { - assertQueryEquals("\"term germ\"~2", null, "spanNear([term, germ], 2, false)"); - assertQueryEquals("\"term germ\"~2 flork", null, "spanNear([term, germ], 2, false) flork"); - assertQueryEquals("\"term\"~2", null, "term"); - assertQueryEquals("\" \"~2 germ", null, "germ"); - assertQueryEquals("\"term germ\"~2^2", null, "spanNear([term, germ], 2, false)^2.0"); - } - - public void testNumber() throws Exception { - // The numbers go away because SimpleAnalzyer ignores them - assertQueryEquals("3", null, "spanOr([])"); - assertQueryEquals("term 1.0 1 2", null, "term"); - assertQueryEquals("term term1 term2", null, "term term term"); - - Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true); - assertQueryEquals("3", a, "3"); - assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); - assertQueryEquals("term term1 term2", a, "term term1 term2"); - } - - public void testWildcard() throws Exception { - assertMultitermEquals("term*", "term*"); - - assertMultitermEquals("term*^2.0","term*", 2.0f); - assertMultitermEquals("term~", "term~2.0"); - assertMultitermEquals("term~1", "term~1.0"); - assertMultitermEquals("term~0.7","term~1.0"); - assertMultitermEquals("term~^3", "term~2.0", 3.0f); - // not currently supported in SpanQueryParser - // assertWildcardQueryEquals("term^3~", "term~2.0", 3.0f); - assertMultitermEquals("term*germ", "term*germ"); - assertMultitermEquals("term*germ^3", "term*germ", 3.0f); - - - PrefixQuery p = new PrefixQuery(new Term("field", "term")); - SpanQuery wrapped = new SpanMultiTermQueryWrapper(p); - assertEquals(getQuery("term*"), wrapped); - - Query parsed = getQuery("term*^2"); - assertMultitermEquals("term*^2", "term*"); - assertEquals(2.0f, parsed.getBoost(), 0.00001f); - - FuzzyQuery f = new FuzzyQuery(new Term("field", "term"), (int)2.0f); - wrapped = new SpanMultiTermQueryWrapper(f); - - //not great test; better if we could retrieve wrapped query for testing. - //don't want to move these tests to SMTQW package. - assertTrue(getQuery("term~") instanceof SpanMultiTermQueryWrapper); - assertTrue(getQuery("term~0.7") instanceof SpanMultiTermQueryWrapper); - /*can't easily test this; - //FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7"); - //assertEquals(1, fq.getMaxEdits()); - - - assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); - fq = (FuzzyQuery)getQuery("term~"); - assertEquals(2, fq.getMaxEdits()); - assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); - */ - //not true of SpanQueryParser...rounds value > 1 - //assertParseException("term~1.1"); // value > 1, throws exception - - assertTrue(getQuery("term*germ") instanceof SpanMultiTermQueryWrapper); - - /* Tests to see that wild card terms are (or are not) properly - * lower-cased with propery parser configuration - */ - // First prefix queries: - // by default, convert to lowercase: - - assertMultitermEquals("Term*", true, "term*"); - // explicitly set lowercase: - assertMultitermEquals("term*", true, "term*"); - assertMultitermEquals("Term*", true, "term*"); - assertMultitermEquals("TERM*", true, "term*"); - // explicitly disable lowercase conversion: - assertMultitermEquals("term*", false, "term*"); - assertMultitermEquals("Term*", false, "Term*"); - assertMultitermEquals("TERM*", false, "TERM*"); - // Then 'full' wildcard queries: - // by default, convert to lowercase: - assertMultitermEquals("Te?m", "te?m"); - // explicitly set lowercase: - assertMultitermEquals("te?m", true, "te?m"); - assertMultitermEquals("Te?m", true, "te?m"); - assertMultitermEquals("TE?M", true, "te?m"); - assertMultitermEquals("Te?m*gerM", true, "te?m*germ"); - // explicitly disable lowercase conversion: - assertMultitermEquals("te?m", false, "te?m"); - assertMultitermEquals("Te?m", false, "Te?m"); - assertMultitermEquals("TE?M", false, "TE?M"); - assertMultitermEquals("Te?m*gerM", false, "Te?m*gerM"); - // Fuzzy queries: - assertMultitermEquals("Term~", "term~2.0"); - assertMultitermEquals("Term~", true, "term~2.0"); - assertMultitermEquals("Term~", false, "Term~2.0"); - // Range queries: - assertMultitermEquals("[A TO C]", "[a TO c]"); - assertMultitermEquals("[A TO C]", true, "[a TO c]"); - assertMultitermEquals("[A TO C]", false, "[A TO C]"); - - // Test suffix queries: first disallow - try { - assertMultitermEquals("*Term", true, "*term"); - fail("didn't get expected exception"); - } catch (Exception pe) { - assertTrue(isQueryParserException(pe)); - } - - try { - assertMultitermEquals("?Term", true, "?term"); - fail("didn't get expected exception"); - } catch (Exception pe) { - assertTrue(isQueryParserException(pe)); - } - - // Test suffix queries: then allow - assertMultitermEquals("*Term", true, "*term", true); - assertMultitermEquals("?Term", true, "?term", true); - } - - public void testLeadingWildcardType() throws Exception { - CommonQueryParserConfiguration cqpC = getParserConfig(qpAnalyzer); - cqpC.setAllowLeadingWildcard(true); - assertEquals(SpanMultiTermQueryWrapper.class, getQuery("t*erm*",cqpC).getClass()); - assertEquals(SpanMultiTermQueryWrapper.class, getQuery("?term*",cqpC).getClass()); - assertEquals(SpanMultiTermQueryWrapper.class, getQuery("*term*",cqpC).getClass()); - } - - public void testQPA() throws Exception { - assertQueryEquals("term term^3.0 term", qpAnalyzer, "term term^3.0 term"); - assertQueryEquals("term stop^3.0 term", qpAnalyzer, "term term"); - - assertQueryEquals("term term term", qpAnalyzer, "term term term"); - assertQueryEquals("term +stop term", qpAnalyzer, "term term"); - assertQueryEquals("term -stop term", qpAnalyzer, "term term"); - - assertQueryEquals("drop AND (stop) AND roll", qpAnalyzer, "+drop +roll"); - assertQueryEquals("term +(stop) term", qpAnalyzer, "term term"); - assertQueryEquals("term -(stop) term", qpAnalyzer, "term term"); - - assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll"); - assertQueryEquals("term phrase term", qpAnalyzer, - "term spanOr([phrase1, phrase2]) term"); - assertQueryEquals("term AND NOT phrase term", qpAnalyzer, - "+term -spanOr([phrase1, phrase2]) term"); - assertQueryEquals("stop^3", qpAnalyzer, "spanOr([])"); - assertQueryEquals("stop", qpAnalyzer, "spanOr([])"); - assertQueryEquals("(stop)^3", qpAnalyzer, "spanOr([])"); - assertQueryEquals("((stop))^3", qpAnalyzer, "spanOr([])"); - assertQueryEquals("(stop^3)", qpAnalyzer, "spanOr([])"); - assertQueryEquals("((stop)^3)", qpAnalyzer, "spanOr([])"); - assertQueryEquals("(stop)", qpAnalyzer, "spanOr([])"); - assertQueryEquals("((stop))", qpAnalyzer, "spanOr([])"); - assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery); - assertTrue(getQuery("term +stop", qpAnalyzer) instanceof SpanTermQuery); - } - - public void testRange() throws Exception { - assertQueryEquals("[ a TO z]", null, "SpanMultiTermQueryWrapper([a TO z])"); - assertQueryEquals("[ a TO z}", null, "SpanMultiTermQueryWrapper([a TO z})"); - assertQueryEquals("{ a TO z]", null, "SpanMultiTermQueryWrapper({a TO z])"); - assertQueryEquals("{ a TO z}", null, "SpanMultiTermQueryWrapper({a TO z})"); - - //SQP:not sure what this should be - // assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, - // ((SpanMultiTermQueryWrapper)getQuery("[ a TO z]")).getRewriteMethod()); - //TODO: turn back on - /* - CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); - - qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)getQuery("[ a TO z]", qp)).getRewriteMethod()); - - // test open ranges - assertQueryEquals("[ a TO * ]", null, "[a TO *]"); - assertQueryEquals("[ * TO z ]", null, "[* TO z]"); - assertQueryEquals("[ * TO * ]", null, "[* TO *]"); - */ - // mixing exclude and include bounds - assertQueryEquals("{ a TO z ]", null, "SpanMultiTermQueryWrapper({a TO z])"); - assertQueryEquals("[ a TO z }", null, "SpanMultiTermQueryWrapper([a TO z})"); - assertQueryEquals("{ a TO * ]", null, "SpanMultiTermQueryWrapper({a TO *])"); - assertQueryEquals("[ * TO z }", null, "SpanMultiTermQueryWrapper([* TO z})"); - - assertQueryEquals("[ a TO z ]", null, "SpanMultiTermQueryWrapper([a TO z])"); - assertQueryEquals("{ a TO z}", null, "SpanMultiTermQueryWrapper({a TO z})"); - assertQueryEquals("{ a TO z }", null, "SpanMultiTermQueryWrapper({a TO z})"); - assertQueryEquals("{ a TO z }^2.0", null, "SpanMultiTermQueryWrapper({a TO z})^2.0"); - assertBoostEquals("{ a TO z }^2.0", 2.0f); - assertQueryEquals("[ a TO z] OR bar", null, "SpanMultiTermQueryWrapper([a TO z]) bar"); - assertQueryEquals("[ a TO z] AND bar", null, "+SpanMultiTermQueryWrapper([a TO z]) +bar"); - assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar SpanMultiTermQueryWrapper({a TO z})"); - assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar SpanMultiTermQueryWrapper({a TO z}))"); - - assertQueryEquals("[* TO Z]",null,"SpanMultiTermQueryWrapper([* TO z])"); - assertQueryEquals("[A TO *]",null,"SpanMultiTermQueryWrapper([a TO *])"); - assertQueryEquals("[* TO *]",null,"SpanMultiTermQueryWrapper([* TO *])"); - } - - public void testRangeWithPhrase() throws Exception { - assertQueryEquals("[\\* TO \"*\"]",null,"SpanMultiTermQueryWrapper([\\* TO \\*])"); - assertQueryEquals("[\"*\" TO *]",null,"SpanMultiTermQueryWrapper([\\* TO *])"); - } - - public void testDateRange() throws Exception { - String startDate = getLocalizedDate(2002, 1, 1); - String endDate = getLocalizedDate(2002, 1, 4); - // we use the default Locale/TZ since LuceneTestCase randomizes it - Calendar endDateExpected = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); - endDateExpected.clear(); - endDateExpected.set(2002, 1, 4, 23, 59, 59); - endDateExpected.set(Calendar.MILLISECOND, 999); - final String defaultField = "default"; - final String monthField = "month"; - final String hourField = "hour"; - Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); - CommonQueryParserConfiguration qp = getParserConfig(a); - - // set a field specific date resolution - setDateResolution(qp, monthField, DateTools.Resolution.MONTH); - - // set default date resolution to MILLISECOND - qp.setDateResolution(DateTools.Resolution.MILLISECOND); - - // set second field specific date resolution - setDateResolution(qp, hourField, DateTools.Resolution.HOUR); - - // for this field no field specific date resolution has been set, - // so verify if the default resolution is used - assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, - endDateExpected.getTime(), DateTools.Resolution.MILLISECOND); - - // verify if field specific date resolutions are used for these two fields - assertDateRangeQueryEquals(qp, monthField, startDate, endDate, - endDateExpected.getTime(), DateTools.Resolution.MONTH); - - assertDateRangeQueryEquals(qp, hourField, startDate, endDate, - endDateExpected.getTime(), DateTools.Resolution.HOUR); - } - - public void testEscaped() throws Exception { - Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); - - assertQueryEquals("\\[brackets", a, "[brackets"); - assertQueryEquals("\\[brackets", null, "brackets"); - assertQueryEquals("\\\\", a, "\\"); - assertQueryEquals("\\+blah", a, "+blah"); - assertQueryEquals("\\(blah", a, "(blah"); - - assertQueryEquals("\\-blah", a, "-blah"); - assertQueryEquals("\\!blah", a, "!blah"); - assertQueryEquals("\\{blah", a, "{blah"); - assertQueryEquals("\\}blah", a, "}blah"); - assertQueryEquals("\\:blah", a, ":blah"); - assertQueryEquals("\\^blah", a, "^blah"); - assertQueryEquals("\\[blah", a, "[blah"); - assertQueryEquals("\\]blah", a, "]blah"); - assertQueryEquals("\\\"blah", a, "\"blah"); - assertQueryEquals("\\(blah", a, "(blah"); - assertQueryEquals("\\)blah", a, ")blah"); - assertQueryEquals("\\~blah", a, "~blah"); - assertQueryEquals("\\*blah", a, "*blah"); - assertQueryEquals("\\?blah", a, "?blah"); - assertQueryEquals("foo \\&\\& bar", a, "foo && bar"); - assertQueryEquals("foo \\|| bar", a, "foo || bar"); - assertQueryEquals("foo \\AND bar", a, "foo AND bar"); - - - assertQueryEquals("\\a", a, "a"); - - assertQueryEquals("a\\-b:c", a, "a-b:c"); - assertQueryEquals("a\\+b:c", a, "a+b:c"); - assertQueryEquals("a\\:b:c", a, "a:b:c"); - assertQueryEquals("a\\\\b:c", a, "a\\b:c"); - - assertQueryEquals("a:b\\-c", a, "a:b-c"); - assertQueryEquals("a:b\\+c", a, "a:b+c"); - assertQueryEquals("a:b\\:c", a, "a:b:c"); - assertQueryEquals("a:b\\\\c", a, "a:b\\c"); - - assertMultitermEquals("a", "a:b\\-c*", a, "b-c*"); - assertMultitermEquals("a", "a:b\\+c*", a, "b+c*"); - assertMultitermEquals("a", "a:b\\:c*", a, "b:c*"); - - assertMultitermEquals("a", "a:b\\\\c*", a, "b\\c*"); - - assertMultitermEquals("a", "a:b\\-c~", a, "b-c~2.0"); - assertMultitermEquals("a", "a:b\\+c~", a, "b+c~2.0"); - assertMultitermEquals("a", "a:b\\:c~", a, "b:c~2.0"); - assertMultitermEquals("a", "a:b\\\\c~", a, "b\\c~2.0"); - - assertMultitermEquals("[ a\\- TO a\\+ ]", "[a- TO a+]"); - assertMultitermEquals("[ a\\: TO a\\~ ]", "[a: TO a~]"); - assertMultitermEquals("[ a\\\\ TO a\\* ]", "[a\\ TO a*]"); - - //change " to ' for spanquery parser - assertMultitermEquals("['c\\:\\\\temp\\\\\\~foo0.txt' TO 'c\\:\\\\temp\\\\\\~foo9.txt']", a, - "[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]"); - - assertQueryEquals("a\\\\\\+b", a, "a\\+b"); - - assertQueryEquals("a \\\"b c\\\" d", a, "a \"b c\" d"); - assertQueryEquals("\"a \\\"b c\\\" d\"", a, "spanNear([a, \"b, c\", d], 0, true)"); - assertQueryEquals("\"a \\+b c d\"", a, "spanNear([a, +b, c, d], 0, true)"); - - assertQueryEquals("c\\:\\\\temp\\\\\\~foo.txt", a, "c:\\temp\\~foo.txt"); - - assertParseException("XY\\"); // there must be a character after the escape char - - // test unicode escaping - assertQueryEquals("a\\u0062c", a, "abc"); - assertQueryEquals("XY\\u005a", a, "XYZ"); - assertQueryEquals("XY\\u005A", a, "XYZ"); - assertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a, "spanNear([a, \\(b\", c], 0, true)"); - - assertParseException("XY\\u005G"); // test non-hex character in escaped unicode sequence - assertParseException("XY\\u005"); // test incomplete escaped unicode sequence - - // Tests bug LUCENE-800 - assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\"); - assertParseException("(item:\\\\ item:ABCD\\\\))"); // unmatched closing paranthesis - assertQueryEquals("\\*", a, "*"); - assertQueryEquals("\\\\", a, "\\"); // escaped backslash - - assertParseException("\\"); // a backslash must always be escaped - - // LUCENE-1189 - assertQueryEquals("(\"a\\\\\") or (\"b\")", a ,"a\\ or b"); - - //now passes actual LUCENE-1189 test with single quotes. - assertQueryEquals("(name:'///mike\\\\\\\') or (name:\"alphonse\")", a, - "name:///mike\\\\\\ or name:alphonse"); - } - - public void testEscapedVsQuestionMarkAsWildcard() throws Exception { - Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); - //SpanMultiTermQueryWrapper(a:b-?c) - assertMultitermEquals("a", "a:b\\-?c", a, "b\\-?c"); - assertMultitermEquals("a", "a:b\\+?c", a, "b\\+?c"); - assertMultitermEquals("a", "a:b\\:?c", a, "b\\:?c"); - - assertMultitermEquals("a", "a:b\\\\?c", a, "b\\\\?c"); - } - - public void testQueryStringEscaping() throws Exception { - Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); - - assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c"); - assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c"); - assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c"); - assertEscapedQueryEquals("a\\b:c", a, "a\\\\b\\:c"); - - assertEscapedQueryEquals("a:b-c", a, "a\\:b\\-c"); - assertEscapedQueryEquals("a:b+c", a, "a\\:b\\+c"); - assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c"); - assertEscapedQueryEquals("a:b\\c", a, "a\\:b\\\\c"); - - assertEscapedQueryEquals("a:b-c*", a, "a\\:b\\-c\\*"); - assertEscapedQueryEquals("a:b+c*", a, "a\\:b\\+c\\*"); - assertEscapedQueryEquals("a:b:c*", a, "a\\:b\\:c\\*"); - - assertEscapedQueryEquals("a:b\\\\c*", a, "a\\:b\\\\\\\\c\\*"); - - assertEscapedQueryEquals("a:b-?c", a, "a\\:b\\-\\?c"); - assertEscapedQueryEquals("a:b+?c", a, "a\\:b\\+\\?c"); - assertEscapedQueryEquals("a:b:?c", a, "a\\:b\\:\\?c"); - - assertEscapedQueryEquals("a:b?c", a, "a\\:b\\?c"); - - assertEscapedQueryEquals("a:b-c~", a, "a\\:b\\-c\\~"); - assertEscapedQueryEquals("a:b+c~", a, "a\\:b\\+c\\~"); - assertEscapedQueryEquals("a:b:c~", a, "a\\:b\\:c\\~"); - assertEscapedQueryEquals("a:b\\c~", a, "a\\:b\\\\c\\~"); - - assertEscapedQueryEquals("[ a - TO a+ ]", null, "\\[ a \\- TO a\\+ \\]"); - assertEscapedQueryEquals("[ a : TO a~ ]", null, "\\[ a \\: TO a\\~ \\]"); - assertEscapedQueryEquals("[ a\\ TO a* ]", null, "\\[ a\\\\ TO a\\* \\]"); - - // LUCENE-881 - assertEscapedQueryEquals("|| abc ||", a, "\\|\\| abc \\|\\|"); - assertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&"); - } - - public void testTabNewlineCarriageReturn() throws Exception { - assertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("+weltbank\n+worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \n+worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \n +worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("+weltbank\r+worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \r+worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \r +worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("+weltbank\r\n+worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \r\n+worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \r\n +worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \r \n +worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("+weltbank\t+worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \t+worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \t +worlbank", null, "+weltbank +worlbank"); - } - - public void testSimpleDAO() throws Exception { - assertQueryEqualsDOA("term term term", null, "+term +term +term"); - assertQueryEqualsDOA("term +term term", null, "+term +term +term"); - assertQueryEqualsDOA("term term +term", null, "+term +term +term"); - assertQueryEqualsDOA("term +term +term", null, "+term +term +term"); - assertQueryEqualsDOA("-term term term", null, "-term +term +term"); - } - - public void testBoost() throws Exception { - CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on")); - Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); - CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer); - Query q = getQuery("on^1.0",qp); - assertNotNull(q); - q = getQuery("\"hello\"^2.0",qp); - assertNotNull(q); - assertEquals(q.getBoost(), (float) 2.0, (float) 0.01); - q = getQuery("hello^2.0",qp); - assertNotNull(q); - assertEquals(q.getBoost(), (float) 2.0, (float) 0.01); - q = getQuery("\"on\"^1.0",qp); - assertNotNull(q); - - Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); - CommonQueryParserConfiguration qp2 = getParserConfig(a2); - q = getQuery("the^3", qp2); - // "the" is a stop word so the result is an empty query: - assertNotNull(q); - assertEquals("spanOr([])", q.toString()); - assertEquals(1.0f, q.getBoost(), 0.01f); - } - - public void testException() throws Exception { - assertParseException("\"some phrase"); - assertParseException("(foo bar"); - assertParseException("foo bar))"); - assertParseException("field:term:with:colon some more terms"); - assertParseException("(sub query)^5.0^2.0 plus more"); - assertParseException("secret AND illegal) AND access:confidential"); - } - - public void testBooleanQuery() throws Exception { - BooleanQuery.setMaxClauseCount(2); - Analyzer purWhitespaceAnalyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); - assertParseException("one two three", purWhitespaceAnalyzer); - } - - /** - * This test differs from TestPrecedenceQueryParser - */ - public void testPrecedence() throws Exception { - CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); - Query query1 = getQuery("A AND B OR C AND D", qp); - Query query2 = getQuery("+A +B +C +D", qp); - assertEquals(query1, query2); - } - - public void testEscapedWildcard() throws Exception { - CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); - WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r")); - SpanMultiTermQueryWrapper wq = new SpanMultiTermQueryWrapper(q); - assertEquals(wq, getQuery("foo\\?ba?r", qp)); - } - - public void testRegexps() throws Exception { - CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); - RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]")); - assertEqualsWrappedRegexp(q, getQuery("/[a-z][123]/",qp)); - - //regexes can't be lowercased with SpanQueryParser - //qp.setLowercaseExpandedTerms(true); - assertEqualsWrappedRegexp(q, getQuery("/[a-z][123]/",qp)); - q.setBoost(0.5f); - assertBoostEquals("/[a-z][123]/^0.5", 0.5f); - qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - assertTrue(getQuery("/[a-z][123]/^0.5",qp) instanceof SpanMultiTermQueryWrapper); - // assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, - // ((SpanMultiTermQueryWrapper)getQuery("/[A-Z][123]/^0.5",qp)).getRewriteMethod()); - // assertEqualsWrappedRegexp(q, getQuery("/[A-Z][123]/^0.5",qp)); - assertBoostEquals("/[a-z][123]/^0.5", 0.5f); - - qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); - - SpanMultiTermQueryWrapper escaped = - //SQP changed [a-z]\\/[123] to [a-z]/[123] - new SpanMultiTermQueryWrapper(new RegexpQuery(new Term("field", "[a-z]/[123]"))); - - assertEquals(escaped, getQuery("/[a-z]\\/[123]/",qp)); - SpanMultiTermQueryWrapper escaped2 = - new SpanMultiTermQueryWrapper(new RegexpQuery(new Term("field", "[a-z]\\*[123]"))); - assertEquals(escaped2, getQuery("/[a-z]\\*[123]/",qp)); - - BooleanQuery complex = new BooleanQuery(); - complex.add(new SpanMultiTermQueryWrapper( - new RegexpQuery(new Term("field", "[a-z]/[123]"))), Occur.MUST); - complex.add(new SpanTermQuery(new Term("path", "/etc/init.d/")), Occur.MUST); - complex.add(new SpanTermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD); - // assertEquals(complex, getQuery("/[a-z]\\/[123]/ AND path:\"/etc/init.d/\" OR \"/etc\\/init\\[.\\]d/lucene/\" ",qp)); - assertEquals(complex, getQuery("/[a-z]\\/[123]/ AND path:\\/etc\\/init.d\\/ OR \\/etc\\/init\\[.\\]d/lucene\\/ ",qp)); - - Query re = new SpanMultiTermQueryWrapper(new RegexpQuery(new Term("field", "http.*"))); - assertEquals(re, getQuery("field:/http.*/",qp)); - assertEquals(re, getQuery("/http.*/",qp)); - - re = new SpanMultiTermQueryWrapper(new RegexpQuery(new Term("field", "http~0.5"))); - assertEquals(re, getQuery("field:/http~0.5/",qp)); - assertEquals(re, getQuery("/http~0.5/",qp)); - - re = new SpanMultiTermQueryWrapper(new RegexpQuery(new Term("field", "boo"))); - assertEquals(re, getQuery("field:/boo/",qp)); - assertEquals(re, getQuery("/boo/",qp)); - - // assertEquals(new SpanTermQuery(new Term(FIELD, "/boo/")), getQuery("\"/boo/\"",qp)); - assertEquals(new SpanTermQuery(new Term("field", "/boo/")), getQuery("\\/boo\\/",qp)); - - BooleanQuery two = new BooleanQuery(); - two.add(new SpanMultiTermQueryWrapper(new RegexpQuery(new Term("field", "foo"))), Occur.SHOULD); - two.add(new SpanMultiTermQueryWrapper(new RegexpQuery(new Term("field", "bar"))), Occur.SHOULD); - assertEquals(two, getQuery("field:/foo/ field:/bar/",qp)); - assertEquals(two, getQuery("/foo/ /bar/",qp)); - } - - public void testStopwords() throws Exception { - CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); - CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet)); - Query result = getQuery("field:the OR field:foo",qp); - assertNotNull("result is null and it shouldn't be", result); - assertTrue("result is not a BooleanQuery", result instanceof SpanOrQuery); - assertEquals(0, ((SpanOrQuery)result).getClauses().length); - result = getQuery("field:woo OR field:the",qp); - assertNotNull("result is null and it shouldn't be", result); - assertTrue("result is not a TermQuery", result instanceof SpanTermQuery); - result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp); - assertNotNull("result is null and it shouldn't be", result); - assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); - if (VERBOSE) System.out.println("Result: " + result); - assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); - } - - public void testPositionIncrement() throws Exception { - //For SQP, this only tests whether stop words have been dropped. - //PositionIncrements are not available in SpanQueries yet. - CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); - //qp.setEnablePositionIncrements(true); - String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; - // 0 2 5 7 8 - SpanNearQuery pq = (SpanNearQuery) getQuery(qtxt,qp); - //System.out.println("Query text: "+qtxt); - //System.out.println("Result: "+pq); - SpanQuery[] clauses = pq.getClauses(); - assertEquals(clauses.length, 5); - Set expected = new HashSet(); - expected.add(new Term("field", "words")); - expected.add(new Term("field", "poisitions")); - expected.add(new Term("field", "pos")); - expected.add(new Term("field", "stopped")); - expected.add(new Term("field", "phrasequery")); - - Set terms = new HashSet(); - for (int i = 0; i < clauses.length; i++) { - SpanQuery q = clauses[i]; - q.extractTerms(terms); - } - assertEquals(expected, terms); - } - - public void testMatchAllDocs() throws Exception { - CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); - assertEquals(new MatchAllDocsQuery(), getQuery("*:*",qp)); - assertEquals(new MatchAllDocsQuery(), getQuery("(*:*)",qp)); - BooleanQuery bq = (BooleanQuery)getQuery("+*:* -*:*",qp); - assertTrue(bq.getClauses()[0].getQuery() instanceof MatchAllDocsQuery); - assertTrue(bq.getClauses()[1].getQuery() instanceof MatchAllDocsQuery); - } -} Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java (revision 1573017) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java (working copy) @@ -37,7 +37,7 @@ public class TestComplexPhraseQuery extends LuceneTestCase { Directory rd; - Analyzer analyzer; + protected Analyzer analyzer; DocData docsContent[] = { new DocData("john smith", "1"), new DocData("johathon smith", "2"), @@ -47,36 +47,45 @@ private IndexSearcher searcher; private IndexReader reader; - String defaultFieldName = "name"; + protected String defaultFieldName = "name"; public void testComplexPhrases() throws Exception { checkMatches("\"john smith\"", "1"); // Simple multi-term still works checkMatches("\"j* smyth~\"", "1,2"); // wildcards and fuzzies are OK in // phrases - checkMatches("\"(jo* -john) smith\"", "2"); // boolean logic works checkMatches("\"jo* smith\"~2", "1,2,3"); // position logic works. checkMatches("\"jo* [sma TO smZ]\" ", "1,2"); // range queries supported checkMatches("\"john\"", "1,3"); // Simple single-term still works - checkMatches("\"(john OR johathon) smith\"", "1,2"); // boolean logic with - // brackets works. + + checkMatches("\"john nosuchword*\"", ""); // phrases with clauses producing + // empty sets + + checkBadQuery("\"jo* id:1 smith\""); // mixing fields in a phrase is bad + } + + public void testParserSpecificSyntax() throws Exception { + checkMatches("\"(jo* -john) smith\"", "2"); // boolean logic works checkMatches("\"(jo* -john) smyth~\"", "2"); // boolean logic with // brackets works. // checkMatches("\"john -percival\"", "1"); // not logic doesn't work // currently :(. + checkMatches("\"(john OR johathon) smith\"", "1,2"); // boolean logic with + // brackets works. + checkBadQuery("\"jo* \"smith\" \""); // phrases inside phrases is bad - checkMatches("\"john nosuchword*\"", ""); // phrases with clauses producing - // empty sets - checkBadQuery("\"jo* id:1 smith\""); // mixing fields in a phrase is bad - checkBadQuery("\"jo* \"smith\" \""); // phrases inside phrases is bad } - private void checkBadQuery(String qString) { + public Query getQuery(String qString) throws Exception { QueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); + return qp.parse(qString); + + } + protected void checkBadQuery(String qString) { Throwable expected = null; try { - qp.parse(qString); + getQuery(qString); } catch (Throwable e) { expected = e; } @@ -84,13 +93,10 @@ } - private void checkMatches(String qString, String expectedVals) + protected void checkMatches(String qString, String expectedVals) throws Exception { - QueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer); - qp.setFuzzyPrefixLength(1); // usually a good idea + Query q = getQuery(qString); - Query q = qp.parse(qString); - HashSet expecteds = new HashSet(); String[] vals = expectedVals.split(","); for (int i = 0; i < vals.length; i++) { Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (revision 1573017) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (working copy) @@ -109,7 +109,7 @@ expected.add(new Term("field", "中")); expected.add(new Term("field", "国")); - assertEquals(expected, getQuery("\"中国\"^0.5", analyzer)); + assertQueryEquals(expected, getQuery("\"中国\"^0.5", analyzer)); } public void testCJKSloppyPhrase() throws Exception { @@ -121,7 +121,7 @@ expected.add(new Term("field", "中")); expected.add(new Term("field", "国")); - assertEquals(expected, getQuery("\"中国\"~3", analyzer)); + assertQueryEquals(expected, getQuery("\"中国\"~3", analyzer)); } public void testAutoGeneratePhraseQueriesOn() throws Exception { @@ -133,7 +133,7 @@ expected.add(new Term("field", "国")); CommonQueryParserConfiguration qp = getParserConfig(analyzer); setAutoGeneratePhraseQueries(qp, true); - assertEquals(expected, getQuery("中国",qp)); + assertQueryEquals(expected, getQuery("中国",qp)); } public void testSimple() throws Exception { @@ -146,40 +146,22 @@ assertQueryEquals("c OR (a AND b)", null, "c (+a +b)"); assertQueryEquals("a AND NOT b", null, "+a -b"); assertQueryEquals("a AND -b", null, "+a -b"); - assertQueryEquals("a AND !b", null, "+a -b"); - assertQueryEquals("a && b", null, "+a +b"); assertQueryEquals("a OR b", null, "a b"); - assertQueryEquals("a || b", null, "a b"); - assertQueryEquals("a OR !b", null, "a -b"); assertQueryEquals("a OR -b", null, "a -b"); assertQueryEquals("+term -term term", null, "+term -term term"); assertQueryEquals("foo:term AND field:anotherTerm", null, "+foo:term +anotherterm"); - assertQueryEquals("term AND \"phrase phrase\"", null, - "+term +\"phrase phrase\""); - assertQueryEquals("\"hello there\"", null, "\"hello there\""); - assertTrue(getQuery("a AND b") instanceof BooleanQuery); - assertTrue(getQuery("hello") instanceof TermQuery); - assertTrue(getQuery("\"hello there\"") instanceof PhraseQuery); + assertInstanceOf(getQuery("a AND b"), BooleanQuery.class); + assertInstanceOf(getQuery("hello"), TermQuery.class); + assertInstanceOf(getQuery("\"hello there\""), PhraseQuery.class); assertQueryEquals("germ term^2.0", null, "germ term^2.0"); assertQueryEquals("(term)^2.0", null, "term^2.0"); assertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0"); assertQueryEquals("term^2.0", null, "term^2.0"); assertQueryEquals("term^2", null, "term^2.0"); - assertQueryEquals("\"germ term\"^2.0", null, "\"germ term\"^2.0"); - assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0"); - - assertQueryEquals("(foo OR bar) AND (baz OR boo)", null, - "+(foo bar) +(baz boo)"); - assertQueryEquals("((a OR b) AND NOT c) OR d", null, - "(+(a b) -c) d"); - assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, - "+(apple \"steve jobs\") -(foo bar baz)"); - assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, - "+(title:dog title:cat) -author:\"bob dole\""); } // FIXME: enhance MockAnalyzer to be able to support testing the empty string. @@ -208,11 +190,17 @@ } public void testSlop() throws Exception { - assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2"); - assertQueryEquals("\"term germ\"~2 flork", null, "\"term germ\"~2 flork"); + PhraseQuery q = new PhraseQuery(); + q.add(new Term("field", "term")); + q.add(new Term("field", "germ")); + q.setSlop(2); + assertQueryEquals(q, getQuery("\"term germ\"~2")); + + q.setBoost(2.0f); + assertQueryEquals(q, getQuery("\"term germ\"~2^2")); + assertQueryEquals("\"term\"~2", null, "term"); assertQueryEquals("\" \"~2 germ", null, "germ"); - assertQueryEquals("\"term germ\"~2^2", null, "\"term germ\"~2^2.0"); } public void testNumber() throws Exception { @@ -234,24 +222,20 @@ assertQueryEquals("term~1", null, "term~1"); assertQueryEquals("term~0.7", null, "term~1"); assertQueryEquals("term~^3", null, "term~2^3.0"); - assertQueryEquals("term^3~", null, "term~2^3.0"); + assertQueryEquals("term*germ", null, "term*germ"); assertQueryEquals("term*germ^3", null, "term*germ^3.0"); - assertTrue(getQuery("term*") instanceof PrefixQuery); - assertTrue(getQuery("term*^2") instanceof PrefixQuery); - assertTrue(getQuery("term~") instanceof FuzzyQuery); - assertTrue(getQuery("term~0.7") instanceof FuzzyQuery); - FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7"); - assertEquals(1, fq.getMaxEdits()); - assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); - fq = (FuzzyQuery)getQuery("term~"); - assertEquals(2, fq.getMaxEdits()); - assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); + assertInstanceOf(getQuery("term*"), PrefixQuery.class); + assertInstanceOf(getQuery("term*^2"), PrefixQuery.class); + assertInstanceOf(getQuery("term~"), FuzzyQuery.class); + assertInstanceOf(getQuery("term~0.7"),FuzzyQuery.class); + assertFuzzyQueryEquals("field", "term", 1, FuzzyQuery.defaultPrefixLength, getQuery("term~0.7")); + assertFuzzyQueryEquals("field", "term", 2, FuzzyQuery.defaultPrefixLength, getQuery("term~")); assertParseException("term~1.1"); // value > 1, throws exception - assertTrue(getQuery("term*germ") instanceof WildcardQuery); + assertInstanceOf(getQuery("term*germ"), WildcardQuery.class); // Tests to see that wild card terms are (or are not) properly // lower-cased with propery parser configuration @@ -311,9 +295,9 @@ public void testLeadingWildcardType() throws Exception { CommonQueryParserConfiguration cqpC = getParserConfig(null); cqpC.setAllowLeadingWildcard(true); - assertEquals(WildcardQuery.class, getQuery("t*erm*",cqpC).getClass()); - assertEquals(WildcardQuery.class, getQuery("?term*",cqpC).getClass()); - assertEquals(WildcardQuery.class, getQuery("*term*",cqpC).getClass()); + assertInstanceOf(getQuery("t*erm*",cqpC), WildcardQuery.class); + assertInstanceOf(getQuery("?term*",cqpC), WildcardQuery.class); + assertInstanceOf(getQuery("*term*",cqpC), WildcardQuery.class); } public void testQPA() throws Exception { @@ -329,10 +313,7 @@ assertQueryEquals("term -(stop) term", qpAnalyzer, "term term"); assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll"); - assertQueryEquals("term phrase term", qpAnalyzer, - "term (phrase1 phrase2) term"); - assertQueryEquals("term AND NOT phrase term", qpAnalyzer, - "+term -(phrase1 phrase2) term"); + assertQueryEquals("stop^3", qpAnalyzer, ""); assertQueryEquals("stop", qpAnalyzer, ""); assertQueryEquals("(stop)^3", qpAnalyzer, ""); @@ -341,15 +322,9 @@ assertQueryEquals("((stop)^3)", qpAnalyzer, ""); assertQueryEquals("(stop)", qpAnalyzer, ""); assertQueryEquals("((stop))", qpAnalyzer, ""); - assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery); - assertTrue(getQuery("term +stop", qpAnalyzer) instanceof TermQuery); + assertInstanceOf(getQuery("term term term", qpAnalyzer), BooleanQuery.class); + assertInstanceOf(getQuery("term +stop", qpAnalyzer), TermQuery.class); - CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer); - setDefaultOperatorAND(cqpc); - assertQueryEquals(cqpc, "field", "term phrase term", - "+term +(+phrase1 +phrase2) +term"); - assertQueryEquals(cqpc, "field", "phrase", - "+phrase1 +phrase2"); } public void testRange() throws Exception { @@ -357,13 +332,7 @@ assertQueryEquals("[ a TO z}", null, "[a TO z}"); assertQueryEquals("{ a TO z]", null, "{a TO z]"); - assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]")).getRewriteMethod()); - - CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); - qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)getQuery("[ a TO z]", qp)).getRewriteMethod()); - // test open ranges assertQueryEquals("[ a TO * ]", null, "[a TO *]"); assertQueryEquals("[ * TO z ]", null, "[* TO z]"); @@ -379,10 +348,6 @@ assertQueryEquals("{ a TO z}", null, "{a TO z}"); assertQueryEquals("{ a TO z }", null, "{a TO z}"); assertQueryEquals("{ a TO z }^2.0", null, "{a TO z}^2.0"); - assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar"); - assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar"); - assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}"); - assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})"); assertQueryEquals("[* TO Z]",null,"[* TO z]"); assertQueryEquals("[A TO *]",null,"[a TO *]"); @@ -484,14 +449,10 @@ assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]"); assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]"); - assertQueryEquals("[\"c\\:\\\\temp\\\\\\~foo0.txt\" TO \"c\\:\\\\temp\\\\\\~foo9.txt\"]", a, - "[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]"); assertQueryEquals("a\\\\\\+b", a, "a\\+b"); assertQueryEquals("a \\\"b c\\\" d", a, "a \"b c\" d"); - assertQueryEquals("\"a \\\"b c\\\" d\"", a, "\"a \"b c\" d\""); - assertQueryEquals("\"a \\+b c d\"", a, "\"a +b c d\""); assertQueryEquals("c\\:\\\\temp\\\\\\~foo.txt", a, "c:\\temp\\~foo.txt"); @@ -501,7 +462,6 @@ assertQueryEquals("a\\u0062c", a, "abc"); assertQueryEquals("XY\\u005a", a, "XYZ"); assertQueryEquals("XY\\u005A", a, "XYZ"); - assertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a, "\"a \\(b\" c\""); assertParseException("XY\\u005G"); // test non-hex character in escaped unicode sequence assertParseException("XY\\u005"); // test incomplete escaped unicode sequence @@ -611,7 +571,7 @@ q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); - assertEquals("", q.toString()); + assertEmpty(q); assertEquals(1.0f, q.getBoost(), 0.01f); } @@ -645,55 +605,47 @@ public void testEscapedWildcard() throws Exception { CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r")); - assertEquals(q, getQuery("foo\\?ba?r", qp)); + assertQueryEquals(q, getQuery("foo\\?ba?r", qp)); } public void testRegexps() throws Exception { CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]")); - assertEquals(q, getQuery("/[a-z][123]/",qp)); + assertQueryEquals(q, getQuery("/[a-z][123]/",qp)); qp.setLowercaseExpandedTerms(true); - assertEquals(q, getQuery("/[A-Z][123]/",qp)); + assertQueryEquals(q, getQuery("/[A-Z][123]/",qp)); q.setBoost(0.5f); - assertEquals(q, getQuery("/[A-Z][123]/^0.5",qp)); + assertQueryEquals(q, getQuery("/[A-Z][123]/^0.5",qp)); qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - assertTrue(getQuery("/[A-Z][123]/^0.5",qp) instanceof RegexpQuery); - assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)getQuery("/[A-Z][123]/^0.5",qp)).getRewriteMethod()); - assertEquals(q, getQuery("/[A-Z][123]/^0.5",qp)); + assertInstanceOf(getQuery("/[A-Z][123]/^0.5",qp), RegexpQuery.class); + assertQueryEquals(q, getQuery("/[A-Z][123]/^0.5",qp)); qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); - Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]")); - assertEquals(escaped, getQuery("/[a-z]\\/[123]/",qp)); Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]")); - assertEquals(escaped2, getQuery("/[a-z]\\*[123]/",qp)); + assertQueryEquals(escaped2, getQuery("/[a-z]\\*[123]/",qp)); - BooleanQuery complex = new BooleanQuery(); - complex.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST); - complex.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST); - complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD); - assertEquals(complex, getQuery("/[a-z]\\/[123]/ AND path:\"/etc/init.d/\" OR \"/etc\\/init\\[.\\]d/lucene/\" ",qp)); Query re = new RegexpQuery(new Term("field", "http.*")); - assertEquals(re, getQuery("field:/http.*/",qp)); - assertEquals(re, getQuery("/http.*/",qp)); + assertQueryEquals(re, getQuery("field:/http.*/",qp)); + assertQueryEquals(re, getQuery("/http.*/",qp)); re = new RegexpQuery(new Term("field", "http~0.5")); - assertEquals(re, getQuery("field:/http~0.5/",qp)); - assertEquals(re, getQuery("/http~0.5/",qp)); + assertQueryEquals(re, getQuery("field:/http~0.5/",qp)); + assertQueryEquals(re, getQuery("/http~0.5/",qp)); re = new RegexpQuery(new Term("field", "boo")); - assertEquals(re, getQuery("field:/boo/",qp)); - assertEquals(re, getQuery("/boo/",qp)); + assertQueryEquals(re, getQuery("field:/boo/",qp)); + assertQueryEquals(re, getQuery("/boo/",qp)); - assertEquals(new TermQuery(new Term("field", "/boo/")), getQuery("\"/boo/\"",qp)); - assertEquals(new TermQuery(new Term("field", "/boo/")), getQuery("\\/boo\\/",qp)); + assertQueryEquals(new TermQuery(new Term("field", "/boo/")), getQuery("\"/boo/\"",qp)); + assertQueryEquals(new TermQuery(new Term("field", "/boo/")), getQuery("\\/boo\\/",qp)); BooleanQuery two = new BooleanQuery(); two.add(new RegexpQuery(new Term("field", "foo")), Occur.SHOULD); two.add(new RegexpQuery(new Term("field", "bar")), Occur.SHOULD); - assertEquals(two, getQuery("field:/foo/ field:/bar/",qp)); - assertEquals(two, getQuery("/foo/ /bar/",qp)); + assertQueryEquals(two, getQuery("field:/foo/ field:/bar/",qp)); + assertQueryEquals(two, getQuery("/foo/ /bar/",qp)); } public void testStopwords() throws Exception { @@ -701,14 +653,14 @@ CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = getQuery("field:the OR field:foo",qp); assertNotNull("result is null and it shouldn't be", result); - assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); - assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0); + assertInstanceOf(result, BooleanQuery.class); + assertEmpty(result); result = getQuery("field:woo OR field:the",qp); assertNotNull("result is null and it shouldn't be", result); - assertTrue("result is not a TermQuery", result instanceof TermQuery); + assertInstanceOf(result, TermQuery.class); result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp); assertNotNull("result is null and it shouldn't be", result); - assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); + assertInstanceOf(result, BooleanQuery.class); if (VERBOSE) System.out.println("Result: " + result); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); } @@ -720,12 +672,9 @@ // 0 2 5 7 8 int expectedPositions[] = {1,3,4,6,9}; PhraseQuery pq = (PhraseQuery) getQuery(qtxt,qp); - //System.out.println("Query text: "+qtxt); - //System.out.println("Result: "+pq); Term t[] = pq.getTerms(); int pos[] = pq.getPositions(); for (int i = 0; i < t.length; i++) { - //System.out.println(i+". "+t[i]+" pos: "+pos[i]); assertEquals("term "+i+" = "+t[i]+" has wrong term-position!",expectedPositions[i],pos[i]); } } @@ -735,8 +684,8 @@ assertEquals(new MatchAllDocsQuery(), getQuery("*:*",qp)); assertEquals(new MatchAllDocsQuery(), getQuery("(*:*)",qp)); BooleanQuery bq = (BooleanQuery)getQuery("+*:* -*:*",qp); - assertTrue(bq.getClauses()[0].getQuery() instanceof MatchAllDocsQuery); - assertTrue(bq.getClauses()[1].getQuery() instanceof MatchAllDocsQuery); + assertInstanceOf(bq.getClauses()[0].getQuery(), MatchAllDocsQuery.class); + assertInstanceOf(bq.getClauses()[1].getQuery(), MatchAllDocsQuery.class); } // LUCENE-2002: make sure defaults for StandardAnalyzer's @@ -766,12 +715,12 @@ setAnalyzeRangeTerms(qp, true); Query expected = TermRangeQuery.newStringRange(getDefaultField(), "collatedabc", "collateddef", true, true); Query actual = getQuery("[abc TO def]", qp); - assertEquals(expected, actual); + assertQueryEquals(expected, actual); } public void testDistanceAsEditsParsing() throws Exception { - FuzzyQuery q = (FuzzyQuery) getQuery("foobar~2",new MockAnalyzer(random())); - assertEquals(2, q.getMaxEdits()); + FuzzyQuery expected = new FuzzyQuery(new Term("field", "foobar"), 2); + assertQueryEquals(expected, getQuery("foobar~2",new MockAnalyzer(random()))); } public void testPhraseQueryToString() throws Exception { @@ -806,7 +755,7 @@ for (int j = 0; j < prefixQueries[i].length; j++) { String queryString = prefixQueries[i][j]; Query q = getQuery(queryString,qp); - assertEquals(PrefixQuery.class, q.getClass()); + assertInstanceOf(q, PrefixQuery.class); } } @@ -815,7 +764,7 @@ for (int j = 0; j < wildcardQueries[i].length; j++) { String qtxt = wildcardQueries[i][j]; Query q = getQuery(qtxt,qp); - assertEquals(WildcardQuery.class, q.getClass()); + assertInstanceOf(q, WildcardQuery.class); } } setDefaultField(oldDefaultField); @@ -845,7 +794,7 @@ // test parsing with non-default boost MatchAllDocsQuery query = new MatchAllDocsQuery(); query.setBoost(2.3f); - assertEquals(query, getQuery(query.toString(),qp)); + assertQueryEquals(query, getQuery(query.toString(),qp)); setDefaultField(oldDefaultField); } @@ -859,4 +808,86 @@ q.add(new TermQuery(new Term("field2", "z")), Occur.MUST); assertEquals(q, getQuery(query, new MockAnalyzer(random()))); } + + //string query equality tests that have to be rewritten + //if parser is generating a SpanQuery + public void testParserSpecificQuery() throws Exception { + + //testSimple + assertQueryEquals("a AND !b", null, "+a -b"); + assertQueryEquals("a && b", null, "+a +b"); + assertQueryEquals("a || b", null, "a b"); + assertQueryEquals("a OR !b", null, "a -b"); + + assertQueryEquals("term AND \"phrase phrase\"", null, + "+term +\"phrase phrase\""); + assertQueryEquals("\"hello there\"", null, "\"hello there\""); + assertQueryEquals("\"germ term\"^2.0", null, "\"germ term\"^2.0"); + assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0"); + + assertQueryEquals("(foo OR bar) AND (baz OR boo)", null, + "+(foo bar) +(baz boo)"); + assertQueryEquals("((a OR b) AND NOT c) OR d", null, + "(+(a b) -c) d"); + assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, + "+(apple \"steve jobs\") -(foo bar baz)"); + assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, + "+(title:dog title:cat) -author:\"bob dole\""); + + + //testRegexps + CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); + Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]")); + assertQueryEquals(escaped, getQuery("/[a-z]\\/[123]/",qp)); + + BooleanQuery complex = new BooleanQuery(); + complex.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST); + complex.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST); + complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD); + assertQueryEquals(complex, getQuery("/[a-z]\\/[123]/ AND path:\"/etc/init.d/\" OR \"/etc\\/init\\[.\\]d/lucene/\" ",qp)); + qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)getQuery("/[A-Z][123]/^0.5",qp)).getRewriteMethod()); + + //testWildcard + assertQueryEquals("term^3~", null, "term~2^3.0"); + + + //testRange + assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]")).getRewriteMethod()); + qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)getQuery("[ a TO z]", qp)).getRewriteMethod()); + assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar"); + assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar"); + assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}"); + assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})"); + + //testSlop + assertQueryEquals("\"term germ\"~2 flork", null, "\"term germ\"~2 flork"); + + //testEscaped + Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); + assertQueryEquals("[\"c\\:\\\\temp\\\\\\~foo0.txt\" TO \"c\\:\\\\temp\\\\\\~foo9.txt\"]", a, + "[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]"); + assertQueryEquals("\"a \\\"b c\\\" d\"", a, "\"a \"b c\" d\""); + assertQueryEquals("\"a \\+b c d\"", a, "\"a +b c d\""); + assertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a, "\"a \\(b\" c\""); + + // LUCENE-1189 + assertQueryEquals("(\"a\\\\\") or (\"b\")", a ,"a\\ or b"); + + //testQPA + assertQueryEquals("term phrase term", qpAnalyzer, + "term (phrase1 phrase2) term"); + assertQueryEquals("term AND NOT phrase term", qpAnalyzer, + "+term -(phrase1 phrase2) term"); + + CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer); + setDefaultOperatorAND(cqpc); + assertQueryEquals(cqpc, "field", "term phrase term", + "+term +(+phrase1 +phrase2) +term"); + assertQueryEquals(cqpc, "field", "phrase", + "+phrase1 +phrase2"); + } } Index: lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestCase.java =================================================================== --- lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestCase.java (revision 1573017) +++ lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestCase.java (working copy) @@ -38,6 +38,7 @@ import org.apache.lucene.queryparser.classic.QueryParserBase; import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.LuceneTestCase; import org.junit.AfterClass; @@ -309,6 +310,11 @@ Query q = getQuery(query, cqpC); assertEquals(result, q.toString(field)); } + + public void assertQueryEquals(Query expected, Query test) { + assertEquals(expected.toString(), test.toString()); + } + public void assertEscapedQueryEquals(String query, Analyzer a, String result) throws Exception { assertEquals(result, QueryParserBase.escape(query)); @@ -331,6 +337,27 @@ assertEquals(result, q.toString("field")); } + public void assertFuzzyQueryEquals(String field, String term, int maxEdits, int prefixLen, Query query) { + assert(query instanceof FuzzyQuery); + FuzzyQuery fq = (FuzzyQuery)query; + assertEquals(field, fq.getField()); + assertEquals(term, fq.getTerm().text()); + assertEquals(maxEdits, fq.getMaxEdits()); + assertEquals(prefixLen, fq.getPrefixLength()); + } + + @SuppressWarnings("rawtypes") + public void assertInstanceOf(Query q, Class other) { + assertTrue(q.getClass().isAssignableFrom(other)); + } + + public void assertEmpty(Query q) { + boolean e = false; + if (q instanceof BooleanQuery && ((BooleanQuery)q).getClauses().length == 0) { + e = true; + } + assertTrue("Empty: "+q.toString(), e); + } public Query getQueryDOA(String query, Analyzer a) throws Exception { if (a == null) { a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/spans/SpanQueryParserBase.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/spans/SpanQueryParserBase.java (revision 1573017) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/spans/SpanQueryParserBase.java (working copy) @@ -99,7 +99,6 @@ //if a full term is analyzed and the analyzer returns nothing, //should a ParseException be thrown or should I just ignore the full token. private boolean throwExceptionForEmptyTerm = false; - private boolean lowercaseRegex = false; //////// //Unsupported operations @@ -173,7 +172,7 @@ @Override protected Query getRegexpQuery(String field, String termStr) throws ParseException { - if (getLowercaseRegex()) { + if (getLowercaseExpandedTerms()) { termStr = termStr.toLowerCase(getLocale()); } Term t = new Term(field, termStr); @@ -229,8 +228,6 @@ /** * Creates a new fuzzy term. - * If minimumSimilarity is >= 1.0f, this rounds to avoid - * exception for numEdits != whole number. * * @return fuzzy query */ @@ -412,28 +409,27 @@ } //if there was an exception during analysis, swallow it and //try for lowercase - if ((start == null && getAnalyzeRangeTerms()) || + if ((start == null && getAnalyzeRangeTerms()) && getNormMultiTerms() == NORM_MULTI_TERMS.LOWERCASE) { start = part1.toLowerCase(getLocale()); - } else { + } else if (start == null){ start = part1; } } - if (part2 == null) { end = null; } else { if (getAnalyzeRangeTerms()) { try { - end = analyzeMultitermTermParseEx(field, part1).utf8ToString(); + end = analyzeMultitermTermParseEx(field, part2).utf8ToString(); } catch (ParseException e) { //swallow..doh! } } - if ((end == null && getAnalyzeRangeTerms()) || + if ((end == null && getAnalyzeRangeTerms()) && getNormMultiTerms() == NORM_MULTI_TERMS.LOWERCASE) { end = part2.toLowerCase(getLocale()); - } else { + } else if (end == null) { end = part2; } } @@ -491,11 +487,9 @@ } } - // if the user enters 2.4 for example, round it so that there won't be - // an - // illegalparameter exception - if (minSimilarity >= 1.0f) { - minSimilarity = (float) Math.round(minSimilarity); + // if the user enters 2.4 for example, throw parse exception + if (minSimilarity >= 1.0f && minSimilarity != (int) minSimilarity) { + throw new ParseException("Fractional edit distances are not allowed!"); } int prefixLen = getFuzzyPrefixLength(); @@ -699,6 +693,7 @@ if (nonEmpties.size() == 1) { return nonEmpties.get(0); } + SpanQuery[] ret = nonEmpties .toArray(new SpanQuery[nonEmpties.size()]); if (quoted || getAutoGeneratePhraseQueries() == true) { @@ -755,10 +750,7 @@ termAtt.fillBytesRef(); //if start is the same, treat it as a synonym...ignore end because //of potential for shingles - if (lastStart > -1 && offAtt.startOffset() == lastStart) - //&& offAttr.endOffset() == lastEnd) - { - + if (lastStart > -1 && offAtt.startOffset() == lastStart) { handleSyn(queries, (SpanTermQuery)newTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)))); } else { queries.add((SpanTermQuery)newTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)))); @@ -1053,16 +1045,10 @@ } /** - * Copied nearly exactly from FuzzyQuery's floatToEdits. - *

- * There are two differences: - *

- *

    - *
  1. FuzzyQuery's floatToEdits requires that the return value - * be <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE
  2. - *
  3. This adds a small amount so that nearly exact - * hits don't get floored: 0.80 for termLen 5 should = 1
  4. - *
      + * Copied nearly exactly from FuzzyQuery's floatToEdits because + * FuzzyQuery's floatToEdits requires that the return value + * be <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + * * @return edits */ public static int unboundedFloatToEdits(float minimumSimilarity, int termLen) { @@ -1071,15 +1057,8 @@ } else if (minimumSimilarity == 0.0f) { return 0; // 0 means exact, not infinite # of edits! } else { - return (int)(0.00001f+(1f-minimumSimilarity) * termLen); + return (int)((1f-minimumSimilarity) * termLen); } } - public boolean getLowercaseRegex() { - return lowercaseRegex; - } - - public void setLowercaseRegex(boolean lowercaseRegex) { - this.lowercaseRegex = lowercaseRegex; - } } Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/spans/SpanQueryParser.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/spans/SpanQueryParser.java (revision 1573017) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/spans/SpanQueryParser.java (working copy) @@ -392,7 +392,12 @@ if (tmpField.equals("*") && tmpTerm instanceof SQPTerm && ((SQPTerm)tmpTerm).getString().equals("*")) { - return new MatchAllDocsQuery(); + Query q = new MatchAllDocsQuery(); + float boost = ((SQPBoostableToken)tmpTerm).getBoost(); + if (boost != SpanQueryParserBase.UNSPECIFIED_BOOST){ + q.setBoost(((SQPBoostableToken)tmpTerm).getBoost()); + } + return q; } return null; } Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/spans/SpanOnlyParser.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/spans/SpanOnlyParser.java (revision 1573017) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/spans/SpanOnlyParser.java (working copy) @@ -38,7 +38,7 @@ * @see SpanQueryParser */ -public class SpanOnlyParser extends AbstractSpanQueryParser{ +public class SpanOnlyParser extends AbstractSpanQueryParser { /** * Initializes the SpanOnlyParser. @@ -62,7 +62,7 @@ } @Override - public Query parse(String s) throws ParseException{ + public Query parse(String s) throws ParseException { Query q = _parsePureSpan(getField(), s); assert(q == null || q instanceof SpanQuery); return q; @@ -94,7 +94,7 @@ } - protected Query _parsePureSpan(String field, String queryString) throws ParseException{ + protected Query _parsePureSpan(String field, String queryString) throws ParseException { SpanQueryLexer lexer = new SpanQueryLexer(); List tokens = lexer.getTokens(queryString); SQPClause overallClause = new SQPOrClause(0, tokens.size());