Index: src/java/org/apache/lucene/search/CachingWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 814707) +++ src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.DocIdBitSet; +import org.apache.lucene.util.OpenBitSet; import java.util.BitSet; import java.util.WeakHashMap; import java.util.Map; @@ -75,10 +76,23 @@ /** Provide the DocIdSet to be cached, using the DocIdSet provided * by the wrapped Filter. - * This implementation returns the given DocIdSet. + *

This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} + * returns true, else it copies the {@link DocIdSetIterator} into + * an {@link OpenBitSet}. */ - protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) { - return docIdSet; + protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) throws IOException { + if (docIdSet.isCacheable()) { + return docIdSet; + } else { + final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); + final DocIdSetIterator it = docIdSet.iterator(); + int doc; + while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + bits.fastSet(doc); + } + assert bits.isCacheable(); // should always be the case + return bits; + } } public DocIdSet getDocIdSet(IndexReader reader) throws IOException { Index: src/java/org/apache/lucene/search/DocIdSet.java =================================================================== --- src/java/org/apache/lucene/search/DocIdSet.java (revision 814707) +++ src/java/org/apache/lucene/search/DocIdSet.java (working copy) @@ -37,6 +37,10 @@ public DocIdSetIterator iterator() { return iterator; } + + public boolean isCacheable() { + return true; + } }; /** Provides a {@link DocIdSetIterator} to access the set. @@ -44,4 +48,15 @@ * {@linkplain #EMPTY_DOCIDSET}.iterator() if there * are no docs that match. */ public abstract DocIdSetIterator iterator() throws IOException; + + /** + * This method is a hint for CachingWrapperFilter, if this DocIdSet should be cached + * without copying it into a BitSet. The default is to return false, which + * is the behaviour of Lucene 2.4.1. If you have a own DocIdSet implementation + * that does its iteration very effective and fast without doing disk I/O, + * override this method and return true. + */ + public boolean isCacheable() { + return false; + } } Index: src/java/org/apache/lucene/search/FieldCacheRangeFilter.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 814707) +++ src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy) @@ -476,6 +476,11 @@ /** this method checks, if a doc is a hit, should throw AIOBE, when position invalid */ abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException; + + /** this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs */ + public boolean isCacheable() { + return !(mayUseTermDocs && reader.hasDeletions()); + } public DocIdSetIterator iterator() throws IOException { // Synchronization needed because deleted docs BitVector @@ -484,7 +489,7 @@ // and the index has deletions final TermDocs termDocs; synchronized(reader) { - termDocs = (mayUseTermDocs && reader.hasDeletions()) ? reader.termDocs(null) : null; + termDocs = isCacheable() ? null : reader.termDocs(null); } if (termDocs != null) { // a DocIdSetIterator using TermDocs to iterate valid docIds Index: src/java/org/apache/lucene/search/FieldCacheTermsFilter.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (revision 814707) +++ src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (working copy) @@ -130,6 +130,11 @@ return new FieldCacheTermsFilterDocIdSetIterator(); } + /** This DocIdSet implementation is cacheable. */ + public boolean isCacheable() { + return true; + } + protected class FieldCacheTermsFilterDocIdSetIterator extends DocIdSetIterator { private int doc = -1; Index: src/java/org/apache/lucene/search/FilteredDocIdSet.java =================================================================== --- src/java/org/apache/lucene/search/FilteredDocIdSet.java (revision 814707) +++ src/java/org/apache/lucene/search/FilteredDocIdSet.java (working copy) @@ -49,6 +49,11 @@ _innerSet = innerSet; } + /** This DocIdSet implementation is cacheable if the inner set is cacheable. */ + public boolean isCacheable() { + return _innerSet.isCacheable(); + } + /** * Validation method to determine whether a docid should be in the result set. * @param docid docid to be tested Index: src/java/org/apache/lucene/search/QueryWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/QueryWrapperFilter.java (revision 814707) +++ src/java/org/apache/lucene/search/QueryWrapperFilter.java (working copy) @@ -74,6 +74,7 @@ public DocIdSetIterator iterator() throws IOException { return weight.scorer(reader, true, false); } + public boolean isCacheable() { return false; } }; } Index: src/java/org/apache/lucene/util/DocIdBitSet.java =================================================================== --- src/java/org/apache/lucene/util/DocIdBitSet.java (revision 814707) +++ src/java/org/apache/lucene/util/DocIdBitSet.java (working copy) @@ -34,6 +34,11 @@ public DocIdSetIterator iterator() { return new DocIdBitSetIterator(bitSet); } + + /** This DocIdSet implementation is cacheable. */ + public boolean isCacheable() { + return true; + } /** * Returns the underlying BitSet. Index: src/java/org/apache/lucene/util/OpenBitSet.java =================================================================== --- src/java/org/apache/lucene/util/OpenBitSet.java (revision 814707) +++ src/java/org/apache/lucene/util/OpenBitSet.java (working copy) @@ -116,6 +116,11 @@ return new OpenBitSetIterator(bits, wlen); } + /** This DocIdSet implementation is cacheable. */ + public boolean isCacheable() { + return true; + } + /** Returns the current capacity in bits (1 greater than the index of the last bit) */ public long capacity() { return bits.length << 6; } Index: src/java/org/apache/lucene/util/SortedVIntList.java =================================================================== --- src/java/org/apache/lucene/util/SortedVIntList.java (revision 814707) +++ src/java/org/apache/lucene/util/SortedVIntList.java (working copy) @@ -180,6 +180,11 @@ return bytes.length; } + /** This DocIdSet implementation is cacheable. */ + public boolean isCacheable() { + return true; + } + /** * @return An iterator over the sorted integers. */ Index: src/test/org/apache/lucene/search/TestCachingWrapperFilter.java =================================================================== --- src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (revision 814707) +++ src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (working copy) @@ -18,12 +18,17 @@ */ import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import java.io.IOException; +import java.util.BitSet; + public class TestCachingWrapperFilter extends LuceneTestCase { public void testCachingWorks() throws Exception { Directory dir = new RAMDirectory(); @@ -50,4 +55,47 @@ reader.close(); } + + private static void assertDocIdSetCacheable(IndexReader reader, Filter filter, boolean shouldCacheable) throws IOException { + final CachingWrapperFilter cacher = new CachingWrapperFilter(filter); + final DocIdSet originalSet = filter.getDocIdSet(reader); + final DocIdSet cachedSet = cacher.getDocIdSet(reader); + assertTrue(cachedSet.isCacheable()); + assertEquals(shouldCacheable, originalSet.isCacheable()); + //System.out.println("Original: "+originalSet.getClass().getName()+" -- cached: "+cachedSet.getClass().getName()); + if (originalSet.isCacheable()) { + assertEquals("Cached DocIdSet has same class like uncached, if cacheable", originalSet.getClass(), cachedSet.getClass()); + } else { + assertTrue("Cached DocIdSet is an OpenBitSet if the original one was not cacheable", cachedSet instanceof OpenBitSet); + } + } + + public void testIsCacheAble() throws Exception { + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); + writer.close(); + + IndexReader reader = IndexReader.open(dir); + + // not cacheable: + assertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test","value"))), false); + // returns default empty docidset, always cacheable: + assertDocIdSetCacheable(reader, NumericRangeFilter.newIntRange("test", new Integer(10000), new Integer(-10000), true, true), true); + // is cacheable: + assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", new Integer(10), new Integer(20), true, true), true); + // a openbitset filter is always cacheable + assertDocIdSetCacheable(reader, new Filter() { + public DocIdSet getDocIdSet(IndexReader reader) { + return new OpenBitSet(); + } + }, true); + // a deprecated filter is always cacheable + assertDocIdSetCacheable(reader, new Filter() { + public BitSet bits(IndexReader reader) { + return new BitSet(); + } + }, true); + + reader.close(); + } }