Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 739226) +++ CHANGES.txt (working copy) @@ -142,6 +142,13 @@ that field, even with different Terms in the filter, are fast. (Tim Sturge, Shalin Shekhar Mangar via Mike McCandless). +13. LUCENE-1506: Added FilteredDocIdSet, an abstract class which you + subclass to implement the "match" method to aceept or reject each + docID. Unlike ChainedFilter (under contrib/misc), + FilteredDocIdSet never requires you to materialize the full + bitset. Instead, match() is called on demand per docID. (John + Wang via Mike McCandless) + Optimizations 1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing Index: src/test/org/apache/lucene/search/TestDocIdSet.java =================================================================== --- src/test/org/apache/lucene/search/TestDocIdSet.java (revision 0) +++ src/test/org/apache/lucene/search/TestDocIdSet.java (revision 0) @@ -0,0 +1,93 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestDocIdSet extends LuceneTestCase { + public void testFilteredDocIdSet() throws Exception { + final int maxdoc=10; + final DocIdSet innerSet = new DocIdSet() { + + // @Override + public DocIdSetIterator iterator() { + return new DocIdSetIterator() { + + int docid=-1; + //@Override + public int doc() { + return docid; + } + + //@Override + public boolean next() throws IOException { + docid++; + return (docid doc()); + + return true; + } + }; + } + }; + + + DocIdSet filteredSet = new FilteredDocIdSet(innerSet){ + // @Override + protected boolean match(int docid) { + return docid%2 == 0; //validate only even docids + } + }; + + DocIdSetIterator iter = filteredSet.iterator(); + ArrayList/**/ list = new ArrayList/**/(); + if (iter.skipTo(3)) { + list.add(new Integer(iter.doc())); + while(iter.next()) { + list.add(new Integer(iter.doc())); + } + } + + int[] docs = new int[list.size()]; + int c=0; + Iterator/**/ intIter = list.iterator(); + while(intIter.hasNext()) { + docs[c++] = ((Integer) intIter.next()).intValue(); + } + int[] answer = new int[]{4,6,8}; + boolean same = Arrays.equals(answer, docs); + if (!same) { + System.out.println("answer: "+Arrays.toString(answer)); + System.out.println("gotten: "+Arrays.toString(docs)); + fail(); + } + } +} Property changes on: src/test/org/apache/lucene/search/TestDocIdSet.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java =================================================================== --- src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java (revision 0) +++ src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java (revision 0) @@ -0,0 +1,91 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * Abstract decorator class of a DocIdSetIterator + * implementation that provides on-demand filter/validation + * mechanism on an underlying DocIdSetIterator. See {@link + * FilteredDocIdSet}. + */ + +public abstract class FilteredDocIdSetIterator extends DocIdSetIterator { + protected DocIdSetIterator _innerIter; + private int _currentDoc; + + /** + * Constructor. + * @param innerIter Underlying DocIdSetIterator. + */ + public FilteredDocIdSetIterator(DocIdSetIterator innerIter) { + if (innerIter == null) { + throw new IllegalArgumentException("null iterator"); + } + _innerIter = innerIter; + _currentDoc = -1; + } + + /** + * Validation method to determine whether a docid should be in the result set. + * @param docid docid to be tested + * @return true if input docid should be in the result set, false otherwise. + * @see #FilteredDocIdSetIterator(DocIdSetIterator). + */ + abstract protected boolean match(int doc); + + // @Override + public final int doc() { + return _currentDoc; + } + + // @Override + public final boolean next() throws IOException{ + while (_innerIter.next()) { + int doc = _innerIter.doc(); + if (match(doc)) { + _currentDoc = doc; + return true; + } + } + return false; + } + + // @Override + public final boolean skipTo(int n) throws IOException{ + boolean flag = _innerIter.skipTo(n); + if (flag) { + int doc = _innerIter.doc(); + if (match(doc)) { + _currentDoc = doc; + return true; + } else { + while (_innerIter.next()) { + int docid = _innerIter.doc(); + if (match(docid)) { + _currentDoc = docid; + return true; + } + } + return false; + } + } + return flag; + } +} Property changes on: src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/search/FilteredDocIdSet.java =================================================================== --- src/java/org/apache/lucene/search/FilteredDocIdSet.java (revision 0) +++ src/java/org/apache/lucene/search/FilteredDocIdSet.java (revision 0) @@ -0,0 +1,72 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * Abstract decorator class for a DocIdSet implementation + * that provides on-demand filtering/validation + * mechanism on a given DocIdSet. + * + *

+ * + * Technically, this same functionality could be achieved + * with ChainedFilter (under contrib/misc), however the + * benefit of this class is it never materializes the full + * bitset for the filter. Instead, the {@link #match} + * method is invoked on-demand, per docID visited during + * searching. If you know few docIDs will be visited, and + * the logic behind {@link #match} is relatively costly, + * this may be a better way to filter than ChainedFilter. + * + * @see DocIdSet + */ + +public abstract class FilteredDocIdSet extends DocIdSet { + private final DocIdSet _innerSet; + + /** + * Constructor. + * @param innerSet Underlying DocIdSet + */ + public FilteredDocIdSet(DocIdSet innerSet) { + _innerSet = innerSet; + } + + /** + * Validation method to determine whether a docid should be in the result set. + * @param docid docid to be tested + * @return true if input docid should be in the result set, false otherwise. + */ + protected abstract boolean match(int docid); + + /** + * Implementation of the contract to build a DocIdSetIterator. + * @see DocIdSetIterator + * @see FilteredDocIdSetIterator + */ + // @Override + public DocIdSetIterator iterator() throws IOException { + return new FilteredDocIdSetIterator(_innerSet.iterator()) { + protected boolean match(int docid) { + return FilteredDocIdSet.this.match(docid); + } + }; + } +} Property changes on: src/java/org/apache/lucene/search/FilteredDocIdSet.java ___________________________________________________________________ Added: svn:eol-style + native