Index: src/java/org/apache/lucene/util/Predicate.java =================================================================== --- src/java/org/apache/lucene/util/Predicate.java (revision 0) +++ src/java/org/apache/lucene/util/Predicate.java (revision 0) @@ -0,0 +1,5 @@ +package org.apache.lucene.util; + +public interface Predicate { + public boolean evaluate(T t); +} Index: src/java/org/apache/lucene/search/MultiSearcher.java =================================================================== --- src/java/org/apache/lucene/search/MultiSearcher.java (revision 940588) +++ src/java/org/apache/lucene/search/MultiSearcher.java (working copy) @@ -17,14 +17,6 @@ * limitations under the License. */ -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.ReaderUtil; -import org.apache.lucene.util.DummyConcurrentLock; - import java.io.IOException; import java.util.HashMap; import java.util.HashSet; @@ -33,13 +25,21 @@ import java.util.concurrent.Callable; import java.util.concurrent.locks.Lock; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.DummyConcurrentLock; +import org.apache.lucene.util.Predicate; +import org.apache.lucene.util.ReaderUtil; + /** Implements search over a set of Searchables. * *

Applications usually need only call the inherited {@link #search(Query,int)} * or {@link #search(Query,Filter,int)} methods. */ public class MultiSearcher extends Searcher { - /** * Document Frequency cache acting as a Dummy-Searcher. This class is no * full-fledged Searcher, but only supports the methods necessary to @@ -160,12 +160,20 @@ @Override public int docFreq(Term term) throws IOException { + return this.docFreq(null, term); + } + + public int docFreq(Predicate searchablePredicate, Term term) throws IOException { int docFreq = 0; - for (int i = 0; i < searchables.length; i++) + for (int i = 0; i < searchables.length; i++) { + if(searchablePredicate != null && !searchablePredicate.evaluate(searchables[i])) { + continue; + } docFreq += searchables[i].docFreq(term); + } + return docFreq; } - // inherit javadoc @Override public Document doc(int n) throws CorruptIndexException, IOException { @@ -197,21 +205,75 @@ return maxDoc; } + /** + * Searches over the instance searchables iff they're also members of searchablePredicate. + * @see {@link MultiSearcher#search(Query, Collector)} + */ + public void search(Predicate searchablePredicate, Query query, Collector results) throws IOException { + search(searchablePredicate, createWeight(query), null, results); + } + + /** + * Searches over the instance searchables iff they're also members of searchablePredicate. + * @see {@link MultiSearcher#search(Query, Filter, Collector)} + */ + public void search(Predicate searchablePredicate, Query query, Filter filter, Collector results) + throws IOException { + search(searchablePredicate, query, filter, results); + } + + /** + * Searches over the instance searchables iff they're also members of searchablePredicate. + * @see {@link MultiSearcher#search(Query, Filter, int, Sort)} + */ + public TopFieldDocs search(Predicate searchablePredicate, Query query, Filter filter, int n, Sort sort) + throws IOException { + return search(searchablePredicate, query, filter, n, sort); + } + + /** + * Searches over the instance searchables iff they're also members of searchablePredicate. + * @see {@link MultiSearcher#search(Query, int)} + */ + public TopDocs search(Predicate searchablePredicate, Query query, int n) throws IOException { + return search(searchablePredicate, query, n); + } + + /** + * Searches over the instance searchables iff they're also members of searchablePredicate. + * @see {@link MultiSearcher#search(Query, Filter, int)} + */ + public TopDocs search(Predicate searchablePredicate, Query query, Filter filter, int n) + throws IOException { + return search(searchablePredicate, createWeight(query), filter, n); + } + @Override - public TopDocs search(Weight weight, Filter filter, int nDocs) + public TopDocs search(Weight weight, Filter filter, int nDocs) throws IOException { + return this.search(null, weight, filter, nDocs); + } + + /** + * Searches over the instance searchables iff they're also members of searchablePredicate. + * @see {@link MultiSearcher#search(Weight, Filter, int)} + */ + public TopDocs search(Predicate searchablePredicate, Weight weight, Filter filter, int nDocs) throws IOException { final HitQueue hq = new HitQueue(nDocs, false); int totalHits = 0; for (int i = 0; i < searchables.length; i++) { // search each searcher + if(searchablePredicate != null && !searchablePredicate.evaluate(searchables[i])) { + continue; + } final TopDocs docs = new MultiSearcherCallableNoSort(DummyConcurrentLock.INSTANCE, searchables[i], weight, filter, nDocs, hq, i, starts).call(); totalHits += docs.totalHits; // update totalHits } final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; - for (int i = hq.size()-1; i >= 0; i--) // put docs in array + for (int i = hq.size()-1; i >= 0; i--) // put docs in array scoreDocs[i] = hq.pop(); float maxScore = (totalHits==0) ? Float.NEGATIVE_INFINITY : scoreDocs[0].score; @@ -219,14 +281,27 @@ return new TopDocs(totalHits, scoreDocs, maxScore); } + @Override public TopFieldDocs search (Weight weight, Filter filter, int n, Sort sort) throws IOException { + return this.search(null, weight, filter, n, sort); + } + + /** + * Searches over the instance searchables iff they're also members of searchablePredicate. + * @see {@link MultiSearcher#search(Weight, Filter, int, Sort)} + */ + public TopFieldDocs search (Predicate searchablePredicate, Weight weight, Filter filter, int n, Sort sort) throws IOException { FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue(n); int totalHits = 0; float maxScore=Float.NEGATIVE_INFINITY; for (int i = 0; i < searchables.length; i++) { // search each searcher + if(searchablePredicate != null && !searchablePredicate.evaluate(searchables[i])) { + continue; + } + final TopFieldDocs docs = new MultiSearcherCallableWithSort(DummyConcurrentLock.INSTANCE, searchables[i], weight, filter, n, hq, sort, i, starts).call(); totalHits += docs.totalHits; // update totalHits @@ -234,7 +309,7 @@ } final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; - for (int i = hq.size() - 1; i >= 0; i--) // put docs in array + for (int i = hq.size() - 1; i >= 0; i--) // put docs in array scoreDocs[i] = hq.pop(); return new TopFieldDocs (totalHits, scoreDocs, hq.getFields(), maxScore); @@ -245,36 +320,64 @@ public void search(Weight weight, Filter filter, final Collector collector) throws IOException { for (int i = 0; i < searchables.length; i++) { - - final int start = starts[i]; - - final Collector hc = new Collector() { - @Override - public void setScorer(Scorer scorer) throws IOException { - collector.setScorer(scorer); - } - @Override - public void collect(int doc) throws IOException { - collector.collect(doc); - } - @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - collector.setNextReader(reader, start + docBase); - } - @Override - public boolean acceptsDocsOutOfOrder() { - return collector.acceptsDocsOutOfOrder(); - } - }; + this.collectorSearch(weight, filter, collector, i); + } + } + + /** + * Searches over the instance searchables iff they're also members of searchablePredicate. + * @see {@link MultiSearcher#search(Weight, Filter, Collector)} + */ + public void search(Predicate searchablePredicate, Weight weight, Filter filter, final Collector collector) + throws IOException { + for (int i = 0; i < searchables.length; i++) { + if(searchablePredicate != null && !searchablePredicate.evaluate(searchables[i])) { + continue; + } - searchables[i].search(weight, filter, hc); + this.collectorSearch(weight, filter, collector, i); } } + + //this logic *could* be contained in search(Set, Weight, Filter, Collector) but if we put it here, + //then we can forego instantiating an empty HashSet and the .isEmpty() call on a typical search() call + private void collectorSearch(Weight weight, Filter filter, final Collector collector, int searchableIndex) + throws IOException { + final int start = starts[searchableIndex]; + + final Collector hc = new Collector() { + @Override + public void setScorer(Scorer scorer) throws IOException { + collector.setScorer(scorer); + } + @Override + public void collect(int doc) throws IOException { + collector.collect(doc); + } + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + collector.setNextReader(reader, start + docBase); + } + @Override + public boolean acceptsDocsOutOfOrder() { + return collector.acceptsDocsOutOfOrder(); + } + }; + + searchables[searchableIndex].search(weight, filter, hc); + } @Override public Query rewrite(Query original) throws IOException { + return this.rewrite(null, original); + } + + public Query rewrite(Predicate searchablePredicate, Query original) throws IOException { final Query[] queries = new Query[searchables.length]; for (int i = 0; i < searchables.length; i++) { + if(searchablePredicate != null && !searchablePredicate.evaluate(searchables[i])) { + continue; + } queries[i] = searchables[i].rewrite(original); } return queries[0].combine(queries); @@ -320,17 +423,29 @@ return rewrittenQuery.weight(cacheSim); } /** - * Collects the document frequency for the given terms form all searchables + * Collects the document frequency for the given terms from all searchables * @param terms term set used to collect the document frequency form all * searchables * @return a map with a term as the key and the terms aggregated document * frequency as a value * @throws IOException if a searchable throws an {@link IOException} */ - Map createDocFrequencyMap(final Set terms) throws IOException { + Map createDocFrequencyMap(final Set terms) throws IOException { + return this.createDocFrequencyMap(null, terms); + } + + /** + * Collects the document frequency for the given terms from searchables, iff they + * are also members of searchablePredicate + * @see {@link MultiSearcher#createDocFrequencyMap(Set)} + */ + Map createDocFrequencyMap(Predicate searchablePredicate, final Set terms) throws IOException { final Term[] allTermsArray = terms.toArray(new Term[terms.size()]); final int[] aggregatedDfs = new int[allTermsArray.length]; for (Searchable searchable : searchables) { + if(searchablePredicate != null && !searchablePredicate.evaluate(searchable)) { + continue; + } final int[] dfs = searchable.docFreqs(allTermsArray); for(int j=0; jSearchables. @@ -81,17 +82,25 @@ * the results back together. */ @Override - public TopDocs search(Weight weight, Filter filter, int nDocs) throws IOException { + public TopDocs search(Predicate searchablePredicate, Weight weight, Filter filter, int nDocs) throws IOException { final HitQueue hq = new HitQueue(nDocs, false); final Lock lock = new ReentrantLock(); - @SuppressWarnings("unchecked") final Future[] searchThreads = new Future[searchables.length]; + + //allocating this initially to the full size of the searchable array may waste memory + //when using a predicate, but it guarantees better execution as no more allocations + //can take place. + final ArrayList> searchThreads = new ArrayList>(searchables.length); for (int i = 0; i < searchables.length; i++) { // search each searchable - searchThreads[i] = executor.submit( - new MultiSearcherCallableNoSort(lock, searchables[i], weight, filter, nDocs, hq, i, starts)); + if(searchablePredicate != null && !searchablePredicate.evaluate(searchables[i])) { + continue; + } + searchThreads.add(executor.submit( + new MultiSearcherCallableNoSort(lock, searchables[i], weight, filter, nDocs, hq, i, starts) + )); } final CountTotalHits func = new CountTotalHits(); - foreach(func, Arrays.asList(searchThreads)); + foreach(func, searchThreads); final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; for (int i = hq.size() - 1; i >= 0; i--) // put docs in array @@ -106,13 +115,17 @@ * the results back together. */ @Override - public TopFieldDocs search(Weight weight, Filter filter, int nDocs, Sort sort) throws IOException { + public TopFieldDocs search(Predicate searchablePredicate, Weight weight, Filter filter, int nDocs, Sort sort) throws IOException { if (sort == null) throw new NullPointerException(); final FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue(nDocs); final Lock lock = new ReentrantLock(); @SuppressWarnings("unchecked") final Future[] searchThreads = new Future[searchables.length]; for (int i = 0; i < searchables.length; i++) { // search each searchable + if(searchablePredicate != null && !searchablePredicate.evaluate(searchables[i])) { + continue; + } + searchThreads[i] = executor.submit( new MultiSearcherCallableWithSort(lock, searchables[i], weight, filter, nDocs, hq, sort, i, starts)); } @@ -144,10 +157,13 @@ * @param collector to receive hits */ @Override - public void search(final Weight weight, final Filter filter, final Collector collector) + public void search(Predicate searchablePredicate, final Weight weight, final Filter filter, final Collector collector) throws IOException { for (int i = 0; i < searchables.length; i++) { - + if(searchablePredicate != null && !searchablePredicate.evaluate(searchables[i])) { + continue; + } + final int start = starts[i]; final Collector hc = new Collector() { @@ -183,11 +199,16 @@ } @Override - HashMap createDocFrequencyMap(Set terms) throws IOException { + HashMap createDocFrequencyMap(Predicate searchablePredicate, Set terms) throws IOException { final Term[] allTermsArray = terms.toArray(new Term[terms.size()]); final int[] aggregatedDocFreqs = new int[terms.size()]; final ArrayList> searchThreads = new ArrayList>(searchables.length); for (Searchable searchable : searchables) { + if(searchablePredicate != null && !searchablePredicate.evaluate(searchable)) { + continue; + } + + final Future future = executor.submit( new DocumentFrequencyCallable(searchable, allTermsArray)); searchThreads.add(future); Index: src/test/org/apache/lucene/search/TestMultiSearcher.java =================================================================== --- src/test/org/apache/lucene/search/TestMultiSearcher.java (revision 940588) +++ src/test/org/apache/lucene/search/TestMultiSearcher.java (working copy) @@ -17,7 +17,12 @@ * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; @@ -30,13 +35,10 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.MockRAMDirectory; -import java.io.IOException; -import java.util.Collections; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.Predicate; /** * Tests {@link MultiSearcher} class. @@ -148,6 +150,15 @@ assertEquals(4, hits2.length); + final HashSet searchSet = new HashSet(1); + searchSet.add(searchers2[0]); + ScoreDoc[] hits3 = mSearcher2.search(new Predicate() { + public boolean evaluate(Searchable t) { + return searchSet.contains(t); + } + }, query, null, 1000).scoreDocs; + assertEquals(1, hits3.length); + // iterating over the hit documents for (int i = 0; i < hits2.length; i++) { // no exception should happen at this point @@ -192,13 +203,13 @@ // creating the mulitSearcher Searcher mSearcher3 = getMultiSearcherInstance(searchers3); // performing the same search - ScoreDoc[] hits3 = mSearcher3.search(query, null, 1000).scoreDocs; + ScoreDoc[] hits4 = mSearcher3.search(query, null, 1000).scoreDocs; - assertEquals(3, hits3.length); + assertEquals(3, hits4.length); // iterating over the hit documents - for (int i = 0; i < hits3.length; i++) { - mSearcher3.doc(hits3[i].doc); + for (int i = 0; i < hits4.length; i++) { + mSearcher3.doc(hits4[i].doc); } mSearcher3.close(); indexStoreA.close(); Index: src/test/org/apache/lucene/search/TestParallelMultiSearcher.java =================================================================== --- src/test/org/apache/lucene/search/TestParallelMultiSearcher.java (revision 940588) +++ src/test/org/apache/lucene/search/TestParallelMultiSearcher.java (working copy) @@ -33,5 +33,5 @@ throws IOException { return new ParallelMultiSearcher(searchers); } - + }