Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk:r1035096,1035103 Property changes on: lucene ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene:r1035096,1035103 Property changes on: lucene\backwards\src ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/backwards/src:r1035096,1035103 Property changes on: lucene\backwards\src\test\org\apache\lucene\analysis\TestISOLatin1AccentFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:r1035096,1035103 Property changes on: lucene\backwards\src\test\org\apache\lucene\document\TestDateTools.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java:r1035096,1035103 Property changes on: lucene\backwards\src\test\org\apache\lucene\document\TestNumberTools.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java:r1035096,1035103 Property changes on: lucene\backwards\src\test\org\apache\lucene\index\TestBackwardsCompatibility.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:r1035096,1035103 Property changes on: lucene\backwards\src\test\org\apache\lucene\util\TestAttributeSource.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java:r1035096,1035103 Property changes on: lucene\build.xml ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/build.xml:r1035096,1035103 Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1035096) +++ lucene/CHANGES.txt (working copy) @@ -421,6 +421,11 @@ * LUCENE-2636: Added MultiCollector which allows running the search with several Collectors. (Shai Erera) + +* LUCENE-2754, LUCENE-2757: Added a wrapper around MultiTermQueries + to add span support: SpanMultiTermQueryWrapper. + Using this wrapper its easy to add fuzzy/wildcard to e.g. a SpanNearQuery. + (Robert Muir, Uwe Schindler) Optimizations Property changes on: lucene\CHANGES.txt ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/CHANGES.txt:r1035096,1035103 Property changes on: lucene\contrib ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib:r1035096,1035103 Property changes on: lucene\contrib\analyzers ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\java\org\apache\lucene\analysis\ca\CatalanAnalyzer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\java\org\apache\lucene\analysis\en\EnglishMinimalStemmer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\java\org\apache\lucene\analysis\en\EnglishPossessiveFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\java\org\apache\lucene\analysis\eu\BasqueAnalyzer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\java\org\apache\lucene\analysis\hu\HungarianLightStemmer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\java\org\apache\lucene\analysis\hy\ArmenianAnalyzer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\java\org\apache\lucene\analysis\shingle ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\java\org\apache\lucene\analysis\util\StemmerUtil.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\java\org\tartarus\snowball\TestApp.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/TestApp.java:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\test ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/test:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\test\org\apache\lucene\analysis\shingle ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle:r1035096,1035103 Property changes on: lucene\contrib\analyzers\common\src\test\org\apache\lucene\analysis\snowball\TestSnowball.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java:r1035096,1035103 Property changes on: lucene\contrib\analyzers\smartcn ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/smartcn:r1035096,1035103 Property changes on: lucene\contrib\analyzers\stempel ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/stempel:r1035096,1035103 Property changes on: lucene\contrib\analyzers\stempel\src\java\org\apache\lucene\analysis\pl\PolishAnalyzer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java:r1035096,1035103 Property changes on: lucene\contrib\benchmark ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/benchmark:r1035096,1035103 Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 1035096) +++ lucene/contrib/CHANGES.txt (working copy) @@ -143,7 +143,11 @@ * LUCENE-1370: Added ShingleFilter option to output unigrams if no shingles can be generated. (Chris Harris via Steven Rowe) - + + * LUCENE-2754, LUCENE-2757: Deprecated SpanRegexQuery. Use + new SpanMultiTermQueryWrapper(new RegexQuery()) instead. + (Robert Muir, Uwe Schindler) + New Features * LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific Property changes on: lucene\contrib\CHANGES.txt ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/CHANGES.txt:r1035096,1035103 Property changes on: lucene\contrib\highlighter ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/highlighter:r1035096,1035103 Property changes on: lucene\contrib\highlighter\src\test ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/highlighter/src/test:r1035096,1035103 Property changes on: lucene\contrib\icu ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/icu:r1035096,1035103 Property changes on: lucene\contrib\icu\lib ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/icu/lib:r1035096,1035103 Property changes on: lucene\contrib\icu\src\java\org\apache\lucene\collation\ICUCollationKeyAnalyzer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java:r1035096,1035103 Property changes on: lucene\contrib\icu\src\java\org\apache\lucene\collation\ICUCollationKeyFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java:r1035096,1035103 Property changes on: lucene\contrib\instantiated\src\test\org\apache\lucene\store\instantiated\TestIndicesEquals.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:r1035096,1035103 Index: lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java =================================================================== --- lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java (revision 1035096) +++ lucene/contrib/queries/src/java/org/apache/lucene/search/regex/SpanRegexQuery.java (working copy) @@ -18,115 +18,29 @@ */ import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.search.spans.Spans; -import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; -import java.io.IOException; -import java.util.Collection; -import java.util.ArrayList; - /** * A SpanQuery version of {@link RegexQuery} allowing regular expression * queries to be nested within other SpanQuery subclasses. + * @deprecated Use new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery()) instead. + * This query will be removed in Lucene 4.0 */ -public class SpanRegexQuery extends SpanQuery implements RegexQueryCapable { - private RegexCapabilities regexImpl = new JavaUtilRegexCapabilities(); - private Term term; +@Deprecated +public class SpanRegexQuery extends SpanMultiTermQueryWrapper implements RegexQueryCapable { + private final RegexCapabilities regexImpl = new JavaUtilRegexCapabilities(); public SpanRegexQuery(Term term) { - this.term = term; + super(new RegexQuery(term)); } - public Term getTerm() { return term; } + public Term getTerm() { return query.getTerm(); } - @Override - public Query rewrite(IndexReader reader) throws IOException { - RegexQuery orig = new RegexQuery(term); - orig.setRegexImplementation(regexImpl); - orig.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - BooleanQuery bq = (BooleanQuery) orig.rewrite(reader); - - BooleanClause[] clauses = bq.getClauses(); - SpanQuery[] sqs = new SpanQuery[clauses.length]; - for (int i = 0; i < clauses.length; i++) { - BooleanClause clause = clauses[i]; - - // Clauses from RegexQuery.rewrite are always TermQuery's - TermQuery tq = (TermQuery) clause.getQuery(); - - sqs[i] = new SpanTermQuery(tq.getTerm()); - sqs[i].setBoost(tq.getBoost()); - } - - SpanOrQuery query = new SpanOrQuery(sqs); - query.setBoost(orig.getBoost()); - - return query; - } - - @Override - public Spans getSpans(IndexReader reader) throws IOException { - throw new UnsupportedOperationException("Query should have been rewritten"); - } - - @Override - public String getField() { - return term.field(); - } - - public Collection getTerms() { - Collection terms = new ArrayList(); - terms.add(term); - return terms; - } - - /* generated by IntelliJ IDEA */ - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - final SpanRegexQuery that = (SpanRegexQuery) o; - - if (!regexImpl.equals(that.regexImpl)) return false; - if (!term.equals(that.term)) return false; - - return true; - } - - /* generated by IntelliJ IDEA */ - @Override - public int hashCode() { - int result; - result = regexImpl.hashCode(); - result = 29 * result + term.hashCode(); - return result; - } - - @Override - public String toString(String field) { - StringBuilder buffer = new StringBuilder(); - buffer.append("spanRegexQuery("); - buffer.append(term); - buffer.append(")"); - buffer.append(ToStringUtils.boost(getBoost())); - return buffer.toString(); - } - public void setRegexImplementation(RegexCapabilities impl) { - this.regexImpl = impl; + query.setRegexImplementation(impl); } public RegexCapabilities getRegexImplementation() { - return regexImpl; + return query.getRegexImplementation(); } } Index: lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java (revision 1035096) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -30,6 +31,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiSearcher; import org.apache.lucene.search.spans.SpanFirstQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.store.Directory; @@ -75,6 +77,65 @@ writer.close(); IndexSearcher searcher = new IndexSearcher(directory, true); + SpanQuery srq = new SpanMultiTermQueryWrapper(new RegexQuery(new Term("field", "aut.*"))); + SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); + // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, + // true); + int numHits = searcher.search(sfq, null, 1000).totalHits; + assertEquals(1, numHits); + searcher.close(); + directory.close(); + } + + public void testSpanRegexBug() throws CorruptIndexException, IOException { + createRAMDirectories(); + + SpanQuery srq = new SpanMultiTermQueryWrapper(new RegexQuery(new Term("field", "a.*"))); + SpanQuery stq = new SpanMultiTermQueryWrapper(new RegexQuery(new Term("field", "b.*"))); + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq, stq }, 6, + true); + + // 1. Search the same store which works + IndexSearcher[] arrSearcher = new IndexSearcher[2]; + arrSearcher[0] = new IndexSearcher(indexStoreA, true); + arrSearcher[1] = new IndexSearcher(indexStoreB, true); + MultiSearcher searcher = new MultiSearcher(arrSearcher); + int numHits = searcher.search(query, null, 1000).totalHits; + arrSearcher[0].close(); + arrSearcher[1].close(); + + // Will fail here + // We expect 2 but only one matched + // The rewriter function only write it once on the first IndexSearcher + // So it's using term: a1 b1 to search on the second IndexSearcher + // As a result, it won't match the document in the second IndexSearcher + assertEquals(2, numHits); + indexStoreA.close(); + indexStoreB.close(); + } + + /** remove in lucene 4.0 */ + @Deprecated + public void testSpanRegexOld() throws Exception { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer())); + Document doc = new Document(); + // doc.add(newField("field", "the quick brown fox jumps over the lazy dog", + // Field.Store.NO, Field.Index.ANALYZED)); + // writer.addDocument(doc); + // doc = new Document(); + doc.add(newField("field", "auto update", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "first auto update", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.optimize(); + writer.close(); + + IndexSearcher searcher = new IndexSearcher(directory, true); SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*")); SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, @@ -85,7 +146,9 @@ directory.close(); } - public void testSpanRegexBug() throws CorruptIndexException, IOException { + /** remove in lucene 4.0 */ + @Deprecated + public void testSpanRegexBugOld() throws CorruptIndexException, IOException { createRAMDirectories(); SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "a.*")); Property changes on: lucene\src ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\BaseCharFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/BaseCharFilter.java:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\LengthFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/LengthFilter.java:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\LetterTokenizer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/LetterTokenizer.java:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\LowerCaseTokenizer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\standard ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\standard\ClassicTokenizerImpl.jflex ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\standard\StandardFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\standard\StandardTokenizer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\standard\StandardTokenizerImpl.jflex ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\standard\UAX29Tokenizer.jflex ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex:r1035096,1035103 Property changes on: lucene\src\java\org\apache\lucene\analysis\Tokenizer.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java:r1035096,1035103 Index: lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (revision 1035096) +++ lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (working copy) @@ -18,29 +18,10 @@ */ import java.io.IOException; -import java.io.Serializable; import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.PriorityQueue; -import java.util.Comparator; -import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.queryParser.QueryParser; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.ByteBlockPool; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefHash; -import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; class ConstantScoreAutoRewrite extends TermCollectingRewrite { @@ -88,8 +69,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/) { - topLevel.add(new TermQuery(term, docFreq), BooleanClause.Occur.SHOULD); + protected void addClause(BooleanQuery topLevel, Term term, float boost /*ignored*/) { + topLevel.add(new TermQuery(term), BooleanClause.Occur.SHOULD); } @Override @@ -102,46 +83,43 @@ final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc()); final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff); - final CutOffTermCollector col = new CutOffTermCollector(docCountCutoff, termCountLimit); + final CutOffTermCollector col = new CutOffTermCollector(reader, docCountCutoff, termCountLimit); collectTerms(reader, query, col); - final int size = col.pendingTerms.size(); + if (col.hasCutOff) { return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query); - } else if (size == 0) { - return getTopLevelQuery(); } else { - final BooleanQuery bq = getTopLevelQuery(); - final Term placeholderTerm = new Term(query.field); - final BytesRefHash pendingTerms = col.pendingTerms; - final int sort[] = pendingTerms.sort(col.termsEnum.getComparator()); - for(int i = 0; i < size; i++) { - // docFreq is not used for constant score here, we pass 1 - // to explicitely set a fake value, so it's not calculated - addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f); + final Query result; + if (col.pendingTerms.isEmpty()) { + result = new BooleanQuery(true); + } else { + BooleanQuery bq = getTopLevelQuery(); + for(Term term : col.pendingTerms) { + addClause(bq, term, 1.0f); + } + // Strip scores + result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); + result.setBoost(query.getBoost()); } - // Strip scores - final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); - result.setBoost(query.getBoost()); - query.incTotalNumberOfTerms(size); + query.incTotalNumberOfTerms(col.pendingTerms.size()); return result; } } - static final class CutOffTermCollector extends TermCollector { - CutOffTermCollector(int docCountCutoff, int termCountLimit) { + private static final class CutOffTermCollector implements TermCollector { + CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) { + this.reader = reader; this.docCountCutoff = docCountCutoff; this.termCountLimit = termCountLimit; } - @Override - public void setNextEnum(TermsEnum termsEnum) throws IOException { - this.termsEnum = termsEnum; - } - - @Override - public boolean collect(BytesRef bytes) throws IOException { - pendingTerms.add(bytes); - docVisitCount += termsEnum.docFreq(); + public boolean collect(Term t, float boost) throws IOException { + pendingTerms.add(t); + // Loading the TermInfo from the terms dict here + // should not be costly, because 1) the + // query/filter will load the TermInfo when it + // runs, and 2) the terms dict has a cache: + docVisitCount += reader.docFreq(t); if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { hasCutOff = true; return false; @@ -151,10 +129,10 @@ int docVisitCount = 0; boolean hasCutOff = false; - TermsEnum termsEnum; - + + final IndexReader reader; final int docCountCutoff, termCountLimit; - final BytesRefHash pendingTerms = new BytesRefHash(); + final ArrayList pendingTerms = new ArrayList(); } @Override Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1035096) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -19,8 +19,6 @@ import java.io.IOException; import java.io.Serializable; -import java.util.ArrayList; -import java.util.PriorityQueue; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -69,20 +67,6 @@ public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException; } - private static final class ConstantScoreFilterRewrite extends RewriteMethod { - @Override - public Query rewrite(IndexReader reader, MultiTermQuery query) { - Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); - result.setBoost(query.getBoost()); - return result; - } - - // Make sure we are still a singleton even after deserializing - protected Object readResolve() { - return CONSTANT_SCORE_FILTER_REWRITE; - } - } - /** A rewrite method that first creates a private Filter, * by visiting each term in sequence and marking all docs * for that term. Matching documents are assigned a @@ -95,51 +79,19 @@ * exception. * * @see #setRewriteMethod */ - public final static RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new ConstantScoreFilterRewrite(); - - private abstract static class BooleanQueryRewrite extends RewriteMethod { - - protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { - final FilteredTermEnum enumerator = query.getEnum(reader); - try { - do { - final Term t = enumerator.term(); - if (t == null || !collector.collect(t, enumerator.difference())) - break; - } while (enumerator.next()); - } finally { - enumerator.close(); - } - } - - protected interface TermCollector { - /** return false to stop collecting */ - boolean collect(Term t, float boost) throws IOException; - } - - } - - private static class ScoringBooleanQueryRewrite extends BooleanQueryRewrite { + public static final RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new RewriteMethod() { @Override - public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { - final BooleanQuery result = new BooleanQuery(true); - collectTerms(reader, query, new TermCollector() { - public boolean collect(Term t, float boost) { - TermQuery tq = new TermQuery(t); // found a match - tq.setBoost(query.getBoost() * boost); // set the boost - result.add(tq, BooleanClause.Occur.SHOULD); // add to query - return true; - } - }); - query.incTotalNumberOfTerms(result.clauses().size()); + public Query rewrite(IndexReader reader, MultiTermQuery query) { + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + result.setBoost(query.getBoost()); return result; } // Make sure we are still a singleton even after deserializing protected Object readResolve() { - return SCORING_BOOLEAN_QUERY_REWRITE; + return CONSTANT_SCORE_FILTER_REWRITE; } - } + }; /** A rewrite method that first translates each term into * {@link BooleanClause.Occur#SHOULD} clause in a @@ -154,107 +106,33 @@ * exceeds {@link BooleanQuery#getMaxClauseCount}. * * @see #setRewriteMethod */ - public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite(); - - - /** - * Base rewrite method for collecting only the top terms - * via a priority queue. - */ - public static abstract class TopTermsBooleanQueryRewrite extends BooleanQueryRewrite { - private final int size; - - /** - * Create a TopTermsBooleanQueryRewrite for - * at most size terms. - *

- * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than - * size, then it will be used instead. - */ - public TopTermsBooleanQueryRewrite(int size) { - this.size = size; - } - - /** Return a suitable Query for a MultiTermQuery term. */ - protected abstract Query getQuery(Term term); - - @Override - public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { - final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount()); - final PriorityQueue stQueue = new PriorityQueue(); - collectTerms(reader, query, new TermCollector() { - public boolean collect(Term t, float boost) { - // ignore uncompetetive hits - if (stQueue.size() >= maxSize && boost <= stQueue.peek().boost) - return true; - // add new entry in PQ - st.term = t; - st.boost = boost; - stQueue.offer(st); - // possibly drop entries from queue - st = (stQueue.size() > maxSize) ? stQueue.poll() : new ScoreTerm(); - return true; - } - - // reusable instance - private ScoreTerm st = new ScoreTerm(); - }); - - final BooleanQuery bq = new BooleanQuery(true); - for (final ScoreTerm st : stQueue) { - Query tq = getQuery(st.term); // found a match - tq.setBoost(query.getBoost() * st.boost); // set the boost - bq.add(tq, BooleanClause.Occur.SHOULD); // add to query - } - query.incTotalNumberOfTerms(bq.clauses().size()); - return bq; - } + public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = ScoringRewrite.SCORING_BOOLEAN_QUERY_REWRITE; - @Override - public int hashCode() { - final int prime = 17; - int result = 1; - result = prime * result + size; - return result; - } + /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except + * scores are not computed. Instead, each matching + * document receives a constant score equal to the + * query's boost. + * + *

NOTE: This rewrite method will hit {@link + * BooleanQuery.TooManyClauses} if the number of terms + * exceeds {@link BooleanQuery#getMaxClauseCount}. + * + * @see #setRewriteMethod */ + public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = ScoringRewrite.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; - @Override - public boolean equals(Object obj) { - if (this == obj) return true; - if (obj == null) return false; - if (getClass() != obj.getClass()) return false; - TopTermsBooleanQueryRewrite other = (TopTermsBooleanQueryRewrite) obj; - if (size != other.size) return false; - return true; - } - - private static class ScoreTerm implements Comparable { - public Term term; - public float boost; - - public int compareTo(ScoreTerm other) { - if (this.boost == other.boost) - return other.term.compareTo(this.term); - else - return Float.compare(this.boost, other.boost); - } - } - } - /** * A rewrite method that first translates each term into * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the * scores as computed by the query. * *

- * This rewrite mode only uses the top scoring terms so it will not overflow - * the boolean max clause count. It is the default rewrite mode for + * This rewrite method only uses the top scoring terms so it will not overflow + * the boolean max clause count. It is the default rewrite method for * {@link FuzzyQuery}. * * @see #setRewriteMethod */ - public static final class TopTermsScoringBooleanQueryRewrite extends - TopTermsBooleanQueryRewrite { + public static final class TopTermsScoringBooleanQueryRewrite extends TopTermsRewrite { /** * Create a TopTermsScoringBooleanQueryRewrite for @@ -268,9 +146,21 @@ } @Override - protected Query getQuery(Term term) { - return new TermQuery(term); + protected int getMaxSize() { + return BooleanQuery.getMaxClauseCount(); } + + @Override + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); + } + + @Override + protected void addClause(BooleanQuery topLevel, Term term, float boost) { + final TermQuery tq = new TermQuery(term); + tq.setBoost(boost); + topLevel.add(tq, BooleanClause.Occur.SHOULD); + } } /** @@ -283,8 +173,7 @@ * * @see #setRewriteMethod */ - public static final class TopTermsBoostOnlyBooleanQueryRewrite extends - TopTermsBooleanQueryRewrite { + public static final class TopTermsBoostOnlyBooleanQueryRewrite extends TopTermsRewrite { /** * Create a TopTermsBoostOnlyBooleanQueryRewrite for @@ -298,44 +187,23 @@ } @Override - protected Query getQuery(Term term) { - return new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(term))); + protected int getMaxSize() { + return BooleanQuery.getMaxClauseCount(); } - } - - private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable { + @Override - public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { - Query result = super.rewrite(reader, query); - assert result instanceof BooleanQuery; - if (!((BooleanQuery) result).clauses().isEmpty()) { - // strip the scores off - result = new ConstantScoreQuery(new QueryWrapperFilter(result)); - result.setBoost(query.getBoost()); - } - return result; + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); } - - // Make sure we are still a singleton even after deserializing + @Override - protected Object readResolve() { - return CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; + protected void addClause(BooleanQuery topLevel, Term term, float boost) { + final Query q = new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(term))); + q.setBoost(boost); + topLevel.add(q, BooleanClause.Occur.SHOULD); } } - - /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except - * scores are not computed. Instead, each matching - * document receives a constant score equal to the - * query's boost. - * - *

NOTE: This rewrite method will hit {@link - * BooleanQuery.TooManyClauses} if the number of terms - * exceeds {@link BooleanQuery#getMaxClauseCount}. - * - * @see #setRewriteMethod */ - public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite(); - - + /** A rewrite method that tries to pick the best * constant-score rewrite method based on term and * document counts from the query. If both the number of @@ -344,137 +212,8 @@ * Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is * used. */ - public static class ConstantScoreAutoRewrite extends BooleanQueryRewrite { + public static class ConstantScoreAutoRewrite extends org.apache.lucene.search.ConstantScoreAutoRewrite {} - // Defaults derived from rough tests with a 20.0 million - // doc Wikipedia index. With more than 350 terms in the - // query, the filter method is fastest: - public static int DEFAULT_TERM_COUNT_CUTOFF = 350; - - // If the query will hit more than 1 in 1000 of the docs - // in the index (0.1%), the filter method is fastest: - public static double DEFAULT_DOC_COUNT_PERCENT = 0.1; - - private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF; - private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT; - - /** If the number of terms in this query is equal to or - * larger than this setting then {@link - * #CONSTANT_SCORE_FILTER_REWRITE} is used. */ - public void setTermCountCutoff(int count) { - termCountCutoff = count; - } - - /** @see #setTermCountCutoff */ - public int getTermCountCutoff() { - return termCountCutoff; - } - - /** If the number of documents to be visited in the - * postings exceeds this specified percentage of the - * maxDoc() for the index, then {@link - * #CONSTANT_SCORE_FILTER_REWRITE} is used. - * @param percent 0.0 to 100.0 */ - public void setDocCountPercent(double percent) { - docCountPercent = percent; - } - - /** @see #setDocCountPercent */ - public double getDocCountPercent() { - return docCountPercent; - } - - @Override - public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { - - // Get the enum and start visiting terms. If we - // exhaust the enum before hitting either of the - // cutoffs, we use ConstantBooleanQueryRewrite; else, - // ConstantFilterRewrite: - final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc()); - final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff); - - final CutOffTermCollector col = new CutOffTermCollector(reader, docCountCutoff, termCountLimit); - collectTerms(reader, query, col); - - if (col.hasCutOff) { - return CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query); - } else { - final Query result; - if (col.pendingTerms.isEmpty()) { - result = new BooleanQuery(true); - } else { - BooleanQuery bq = new BooleanQuery(true); - for(Term term : col.pendingTerms) { - TermQuery tq = new TermQuery(term); - bq.add(tq, BooleanClause.Occur.SHOULD); - } - // Strip scores - result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); - result.setBoost(query.getBoost()); - } - query.incTotalNumberOfTerms(col.pendingTerms.size()); - return result; - } - } - - private static final class CutOffTermCollector implements TermCollector { - CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) { - this.reader = reader; - this.docCountCutoff = docCountCutoff; - this.termCountLimit = termCountLimit; - } - - public boolean collect(Term t, float boost) throws IOException { - pendingTerms.add(t); - // Loading the TermInfo from the terms dict here - // should not be costly, because 1) the - // query/filter will load the TermInfo when it - // runs, and 2) the terms dict has a cache: - docVisitCount += reader.docFreq(t); - if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { - hasCutOff = true; - return false; - } - return true; - } - - int docVisitCount = 0; - boolean hasCutOff = false; - - final IndexReader reader; - final int docCountCutoff, termCountLimit; - final ArrayList pendingTerms = new ArrayList(); - } - - @Override - public int hashCode() { - final int prime = 1279; - return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent)); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - - ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj; - if (other.termCountCutoff != termCountCutoff) { - return false; - } - - if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) { - return false; - } - - return true; - } - } - /** Read-only default instance of {@link * ConstantScoreAutoRewrite}, with {@link * ConstantScoreAutoRewrite#setTermCountCutoff} set to Property changes on: lucene\src\java\org\apache\lucene\search\MultiTermQueryWrapperFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:r1035096,1035103 Index: lucene/src/java/org/apache/lucene/search/ScoringRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (revision 1035096) +++ lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (working copy) @@ -28,17 +28,9 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.TermEnum; import org.apache.lucene.search.MultiTermQuery.RewriteMethod; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.ByteBlockPool; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefHash; -import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; - /** @lucene.internal Only public to be accessible by spans package. */ public abstract class ScoringRewrite extends TermCollectingRewrite { @@ -62,8 +54,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { - final TermQuery tq = new TermQuery(term, docCount); + protected void addClause(BooleanQuery topLevel, Term term, float boost) { + final TermQuery tq = new TermQuery(term); tq.setBoost(boost); topLevel.add(tq, BooleanClause.Occur.SHOULD); } @@ -105,99 +97,17 @@ }; @Override - public final Q rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { + public Q rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final Q result = getTopLevelQuery(); - final ParallelArraysTermCollector col = new ParallelArraysTermCollector(result instanceof BooleanQuery); - collectTerms(reader, query, col); - - final Term placeholderTerm = new Term(query.field); - final int size = col.terms.size(); - if (size > 0) { - final int sort[] = col.terms.sort(col.termsEnum.getComparator()); - final int[] docFreq = col.array.docFreq; - final float[] boost = col.array.boost; - for (int i = 0; i < size; i++) { - final int pos = sort[i]; - final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef())); - assert reader.docFreq(term) == docFreq[pos]; - addClause(result, term, docFreq[pos], query.getBoost() * boost[pos]); + final int[] size = new int[1]; // "trick" to be able to make it final + collectTerms(reader, query, new TermCollector() { + public boolean collect(Term t, float boost) throws IOException { + addClause(result, t, query.getBoost() * boost); + size[0]++; + return true; } - } - query.incTotalNumberOfTerms(size); + }); + query.incTotalNumberOfTerms(size[0]); return result; } - - static final class ParallelArraysTermCollector extends TermCollector { - private final boolean checkMaxClauseCount; - final TermFreqBoostByteStart array = new TermFreqBoostByteStart(16); - final BytesRefHash terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); - TermsEnum termsEnum; - - private BoostAttribute boostAtt; - - public ParallelArraysTermCollector(boolean checkMaxClauseCount) { - this.checkMaxClauseCount = checkMaxClauseCount; - } - - @Override - public void setNextEnum(TermsEnum termsEnum) throws IOException { - this.termsEnum = termsEnum; - this.boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); - } - - @Override - public boolean collect(BytesRef bytes) { - final int e = terms.add(bytes); - if (e < 0 ) { - // duplicate term: update docFreq - final int pos = (-e)-1; - array.docFreq[pos] += termsEnum.docFreq(); - assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums"; - } else { - // new entry: we populate the entry initially - array.docFreq[e] = termsEnum.docFreq(); - array.boost[e] = boostAtt.getBoost(); - } - // if the new entry reaches the max clause count, we exit early - if (checkMaxClauseCount && e >= BooleanQuery.getMaxClauseCount()) - throw new BooleanQuery.TooManyClauses(); - return true; - } - } - - /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */ - static final class TermFreqBoostByteStart extends DirectBytesStartArray { - int[] docFreq; - float[] boost; - - public TermFreqBoostByteStart(int initSize) { - super(initSize); - } - - @Override - public int[] init() { - final int[] ord = super.init(); - boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)]; - docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)]; - assert boost.length >= ord.length && docFreq.length >= ord.length; - return ord; - } - - @Override - public int[] grow() { - final int[] ord = super.grow(); - docFreq = ArrayUtil.grow(docFreq, ord.length); - boost = ArrayUtil.grow(boost, ord.length); - assert boost.length >= ord.length && docFreq.length >= ord.length; - return ord; - } - - @Override - public int[] clear() { - boost = null; - docFreq = null; - return super.clear(); - } - - } } Index: lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 1035096) +++ lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.lang.reflect.Method; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -45,6 +46,7 @@ */ public class SpanMultiTermQueryWrapper extends SpanQuery { protected final Q query; + private Method getFieldMethod = null, getTermMethod = null; /** * Create a new SpanMultiTermQueryWrapper. @@ -55,6 +57,9 @@ * on the wrapped query, changing its rewrite method to a suitable one for spans. * Be sure to not change the rewrite method on the wrapped query afterwards! Doing so will * throw {@link UnsupportedOperationException} on rewriting this query! + * In Lucene 3.x, MultiTermQuery allows queries to rewrite to different field names, but SpanQuery + * needs a fixed field. The wrapped query must therefore support getField() or getTerm(). + * @throws IllegalArgumentException if the wrapped query does not provide getField() or getTerm(). */ public SpanMultiTermQueryWrapper(Q query) { this.query = query; @@ -66,6 +71,23 @@ } else { setRewriteMethod(SCORING_SPAN_QUERY_REWRITE); } + + // In Lucene 3.x, MTQ has no fixed field, we need to get it by reflection. + // If the underlying query does not allow to get a constant field, we throw IAE: + try { + getFieldMethod = query.getClass().getMethod("getField"); + } catch (Exception e1) { + try { + getTermMethod = query.getClass().getMethod("getTerm"); + } catch (Exception e2) { + try { + getTermMethod = query.getClass().getMethod("getPrefix"); + } catch (Exception e3) { + throw new IllegalArgumentException("SpanMultiTermQueryWrapper can only wrap MultiTermQueries"+ + " that can return a field name using getField() or getTerm()"); + } + } + } } /** @@ -93,7 +115,16 @@ @Override public String getField() { - return query.getField(); + try { + if (getFieldMethod != null) { + return (String) getFieldMethod.invoke(query); + } else { + assert getTermMethod != null; + return ((Term) getTermMethod.invoke(query)).field(); + } + } catch (Exception e) { + throw new RuntimeException("Cannot invoke getField() or getTerm() on wrapped query.", e); + } } @Override @@ -148,7 +179,7 @@ } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, float boost) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); @@ -197,7 +228,7 @@ } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, float boost) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); Index: lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (revision 1035096) +++ lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (working copy) @@ -42,16 +42,20 @@ // copy clauses array into an ArrayList this.clauses = new ArrayList(clauses.length); for (int i = 0; i < clauses.length; i++) { - SpanQuery clause = clauses[i]; - if (i == 0) { // check field - field = clause.getField(); - } else if (!clause.getField().equals(field)) { - throw new IllegalArgumentException("Clauses must have same field."); - } - this.clauses.add(clause); + addClause(clauses[i]); } } + /** Adds a clause to this query */ + public final void addClause(SpanQuery clause) { + if (field == null) { + field = clause.getField(); + } else if (!clause.getField().equals(field)) { + throw new IllegalArgumentException("Clauses must have same field."); + } + this.clauses.add(clause); + } + /** Return the clauses whose spans are matched. */ public SpanQuery[] getClauses() { return clauses.toArray(new SpanQuery[clauses.size()]); Index: lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (revision 1035096) +++ lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (working copy) @@ -18,19 +18,10 @@ */ import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Comparator; -import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.index.TermEnum; abstract class TermCollectingRewrite extends MultiTermQuery.RewriteMethod { @@ -38,56 +29,23 @@ protected abstract Q getTopLevelQuery() throws IOException; /** Add a MultiTermQuery term to the top-level query */ - protected abstract void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException; + protected abstract void addClause(Q topLevel, Term term, float boost) throws IOException; protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { - final List subReaders = new ArrayList(); - ReaderUtil.gatherSubReaders(subReaders, reader); - Comparator lastTermComp = null; - - for (IndexReader r : subReaders) { - final Fields fields = r.fields(); - if (fields == null) { - // reader has no fields - continue; - } - - final Terms terms = fields.terms(query.field); - if (terms == null) { - // field does not exist - continue; - } - - final TermsEnum termsEnum = query.getTermsEnum(r, collector.attributes); - assert termsEnum != null; - - if (termsEnum == TermsEnum.EMPTY) - continue; - - // Check comparator compatibility: - final Comparator newTermComp = termsEnum.getComparator(); - if (lastTermComp != null && newTermComp != lastTermComp) - throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp); - lastTermComp = newTermComp; - - collector.setNextEnum(termsEnum); - BytesRef bytes; - while ((bytes = termsEnum.next()) != null) { - termsEnum.cacheCurrentTerm(); - if (!collector.collect(bytes)) - return; // interrupt whole term collection, so also don't iterate other subReaders - } + final FilteredTermEnum enumerator = query.getEnum(reader); + try { + do { + final Term t = enumerator.term(); + if (t == null || !collector.collect(t, enumerator.difference())) + break; + } while (enumerator.next()); + } finally { + enumerator.close(); } } - protected static abstract class TermCollector { - /** attributes used for communication with the enum */ - public final AttributeSource attributes = new AttributeSource(); - + protected interface TermCollector { /** return false to stop collecting */ - public abstract boolean collect(BytesRef bytes) throws IOException; - - /** the next segment's {@link TermsEnum} that is used to collect terms */ - public abstract void setNextEnum(TermsEnum termsEnum) throws IOException; + boolean collect(Term t, float boost) throws IOException; } } Index: lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (revision 1035096) +++ lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (working copy) @@ -18,16 +18,11 @@ */ import java.io.IOException; -import java.util.HashMap; -import java.util.Map; import java.util.PriorityQueue; -import java.util.Comparator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.TermEnum; /** * Base rewrite method for collecting only the top terms @@ -58,82 +53,33 @@ protected abstract int getMaxSize(); @Override - public final Q rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { + public Q rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final int maxSize = Math.min(size, getMaxSize()); final PriorityQueue stQueue = new PriorityQueue(); collectTerms(reader, query, new TermCollector() { - private final MaxNonCompetitiveBoostAttribute maxBoostAtt = - attributes.addAttribute(MaxNonCompetitiveBoostAttribute.class); - - private final Map visitedTerms = new HashMap(); - - private TermsEnum termsEnum; - private Comparator termComp; - private BoostAttribute boostAtt; - private ScoreTerm st; - - @Override - public void setNextEnum(TermsEnum termsEnum) throws IOException { - this.termsEnum = termsEnum; - this.termComp = termsEnum.getComparator(); - // lazy init the initial ScoreTerm because comparator is not known on ctor: - if (st == null) - st = new ScoreTerm(this.termComp); - boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); - } - - @Override - public boolean collect(BytesRef bytes) { - final float boost = boostAtt.getBoost(); + public boolean collect(Term t, float boost) { // ignore uncompetetive hits - if (stQueue.size() == maxSize) { - final ScoreTerm t = stQueue.peek(); - if (boost < t.boost) - return true; - if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0) - return true; - } - ScoreTerm t = visitedTerms.get(bytes); - if (t != null) { - // if the term is already in the PQ, only update docFreq of term in PQ - t.docFreq += termsEnum.docFreq(); - assert t.boost == boost : "boost should be equal in all segment TermsEnums"; - } else { - // add new entry in PQ, we must clone the term, else it may get overwritten! - st.bytes.copy(bytes); - st.boost = boost; - st.docFreq = termsEnum.docFreq(); - visitedTerms.put(st.bytes, st); - stQueue.offer(st); - // possibly drop entries from queue - if (stQueue.size() > maxSize) { - st = stQueue.poll(); - visitedTerms.remove(st.bytes); - } else { - st = new ScoreTerm(termComp); - } - assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize"; - // set maxBoostAtt with values to help FuzzyTermsEnum to optimize - if (stQueue.size() == maxSize) { - t = stQueue.peek(); - maxBoostAtt.setMaxNonCompetitiveBoost(t.boost); - maxBoostAtt.setCompetitiveTerm(t.bytes); - } - } + if (stQueue.size() >= maxSize && boost <= stQueue.peek().boost) + return true; + // add new entry in PQ + st.term = t; + st.boost = boost; + stQueue.offer(st); + // possibly drop entries from queue + st = (stQueue.size() > maxSize) ? stQueue.poll() : new ScoreTerm(); return true; } + + // reusable instance + private ScoreTerm st = new ScoreTerm(); }); - final Term placeholderTerm = new Term(query.field); final Q q = getTopLevelQuery(); - final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]); - ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp); - for (final ScoreTerm st : scoreTerms) { - final Term term = placeholderTerm.createTerm(st.bytes); - assert reader.docFreq(term) == st.docFreq; - addClause(q, term, st.docFreq, query.getBoost() * st.boost); // add to query + for (final ScoreTerm st : stQueue) { + addClause(q, st.term, query.getBoost() * st.boost); // add to query } - query.incTotalNumberOfTerms(scoreTerms.length); + query.incTotalNumberOfTerms(stQueue.size()); + return q; } @@ -152,31 +98,16 @@ return true; } - private static final Comparator scoreTermSortByTermComp = - new Comparator() { - public int compare(ScoreTerm st1, ScoreTerm st2) { - assert st1.termComp == st2.termComp : - "term comparator should not change between segments"; - return st1.termComp.compare(st1.bytes, st2.bytes); - } - }; - - static final class ScoreTerm implements Comparable { - public final Comparator termComp; - - public final BytesRef bytes = new BytesRef(); + private static class ScoreTerm implements Comparable { + public Term term; public float boost; - public int docFreq; - public ScoreTerm(Comparator termComp) { - this.termComp = termComp; - } - public int compareTo(ScoreTerm other) { if (this.boost == other.boost) - return termComp.compare(other.bytes, this.bytes); + return other.term.compareTo(this.term); else return Float.compare(this.boost, other.boost); } } + } Property changes on: lucene\src\java\org\apache\lucene\util\StringHelper.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/StringHelper.java:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\analysis ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\analysis\email.addresses.from.random.text.with.email.addresses.txt ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/email.addresses.from.random.text.with.email.addresses.txt:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\analysis\LuceneResourcesWikiPage.html ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPage.html:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\analysis\LuceneResourcesWikiPageURLs.txt ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPageURLs.txt:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\analysis\random.text.with.email.addresses.txt ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/random.text.with.email.addresses.txt:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\analysis\random.text.with.urls.txt ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/random.text.with.urls.txt:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\analysis\TestISOLatin1AccentFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\analysis\urls.from.random.text.with.urls.txt ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/urls.from.random.text.with.urls.txt:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\collation ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/collation:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\document\TestDateTools.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestDateTools.java:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\document\TestNumberTools.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/document/TestNumberTools.java:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\index ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/index:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\index\TestBackwardsCompatibility.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\search\spans\TestPayloadSpans.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java:r1035096,1035103 Index: lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java (revision 1035096) +++ lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java (working copy) @@ -83,7 +83,7 @@ public void testFuzzy2() throws Exception { // maximum of 1 term expansion - FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan"), 1f, 0, 1); + FuzzyQuery fq = new FuzzyQuery(new Term("field", "broan"), 0.5f, 0, 1); SpanQuery sfq = new SpanMultiTermQueryWrapper(fq); // will only match jumps over lazy broun dog SpanPositionRangeQuery sprq = new SpanPositionRangeQuery(sfq, 0, 100); Property changes on: lucene\src\test\org\apache\lucene\search\spans\TestSpans.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\search\TestCachingWrapperFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java:r1035096,1035103 Property changes on: lucene\src\test\org\apache\lucene\util\TestAttributeSource.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java:r1035096,1035103 Property changes on: solr ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr:r1035096,1035103 Property changes on: solr\example ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/example:r1035096,1035103 Property changes on: solr\lib\commons-httpclient-3.1.jar ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/lib/commons-httpclient-3.1.jar:r1035096,1035103 Property changes on: solr\lib\jcl-over-slf4j-1.5.5.jar ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/lib/jcl-over-slf4j-1.5.5.jar:r1035096,1035103 Property changes on: solr\src ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src:r1035096,1035103 Property changes on: solr\src\common\org\apache\solr\common ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/common/org/apache/solr/common:r1035096,1035103 Property changes on: solr\src\java\org\apache\solr\analysis\ShingleFilterFactory.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java:r1035096,1035103 Property changes on: solr\src\java\org\apache\solr\analysis\SynonymFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/SynonymFilter.java:r1035096,1035103 Property changes on: solr\src\java\org\apache\solr\analysis\SynonymMap.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/SynonymMap.java:r1035096,1035103 Property changes on: solr\src\java\org\apache\solr\analysis\WordDelimiterIterator.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/WordDelimiterIterator.java:r1035096,1035103 Property changes on: solr\src\java\org\apache\solr\response ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/java/org/apache/solr/response:r1035096,1035103 Property changes on: solr\src\maven\solr-core-pom.xml.template ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/maven/solr-core-pom.xml.template:r1035096,1035103 Property changes on: solr\src\maven\solr-solrj-pom.xml.template ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/maven/solr-solrj-pom.xml.template:r1035096,1035103 Property changes on: solr\src\solrj\org ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/solrj/org:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr\analysis\TestPatternTokenizerFactory.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr\analysis\TestRemoveDuplicatesTokenFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr\analysis\TestShingleFilterFactory.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestShingleFilterFactory.java:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr\analysis\TestSynonymFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr\analysis\TestTrimFilter.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr\client ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr/client:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr\handler\component\QueryElevationComponentTest.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr\request\SimpleFacetsTest.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java:r1035096,1035103 Property changes on: solr\src\test\org\apache\solr\update\AutoCommitTest.java ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/test/org/apache/solr/update/AutoCommitTest.java:r1035096,1035103 Property changes on: solr\src\webapp\src\org\apache\solr\client\solrj\embedded ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/webapp/src/org/apache/solr/client/solrj/embedded:r1035096,1035103 Property changes on: solr\src\webapp\web\admin ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/solr/src/webapp/web/admin:r1035096,1035103