Index: . =================================================================== --- . (revision 1206033) +++ . (working copy) Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk:r1206033 Index: lucene =================================================================== --- lucene (revision 1206033) +++ lucene (working copy) Property changes on: lucene ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/trunk/lucene:r1206033 Index: lucene/backwards/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java =================================================================== --- lucene/backwards/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (revision 1206033) +++ lucene/backwards/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (working copy) @@ -535,7 +535,6 @@ search.close(); } - // test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 @Test public void testSparseIndex() throws IOException { Directory dir = newDirectory(); @@ -561,23 +560,23 @@ Query q = new TermQuery(new Term("body","body")); result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) -20),Byte.valueOf((byte) 20),T,T), 100).scoreDocs; - assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + //behaviour change: assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); assertEquals("find all", 40, result.length); result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) 0),Byte.valueOf((byte) 20),T,T), 100).scoreDocs; - assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + //behaviour change: assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); assertEquals("find all", 20, result.length); result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) -20),Byte.valueOf((byte) 0),T,T), 100).scoreDocs; - assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + //behaviour change: assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); assertEquals("find all", 20, result.length); result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) 10),Byte.valueOf((byte) 20),T,T), 100).scoreDocs; - assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + //behaviour change: assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); assertEquals("find all", 11, result.length); result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) -20),Byte.valueOf((byte) -10),T,T), 100).scoreDocs; - assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + //behaviour change: assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); assertEquals("find all", 11, result.length); search.close(); reader.close(); Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1206033) +++ lucene/CHANGES.txt (working copy) @@ -25,6 +25,12 @@ have at least one or no value at all in a specific field. (Simon Willnauer, Uwe Schindler, Robert Muir) +Bug fixes + +* LUCENE-3595: Fixed FieldCacheRangeFilter and FieldCacheTermsFilter + to correctly respect deletions on reopened SegmentReaders. Factored out + FieldCacheDocIdSet to be a top-level class. (Uwe Schindler, Simon Willnauer) + ======================= Lucene 3.5.0 ======================= Changes in backwards compatibility policy Index: lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java (working copy) @@ -0,0 +1,120 @@ +package org.apache.lucene.search; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermDocs; + +/** + * Base class for DocIdSet to be used with FieldCache. The implementation + * of its iterator is very stupid and slow if the implementation of the + * {@link #matchDoc} method is not optimized, as iterators simply increment + * the document id until {@code matchDoc(int)} returns true. Because of this + * {@code matchDoc(int)} must be as fast as possible and in no case do any + * I/O. + * @lucene.internal + */ +public abstract class FieldCacheDocIdSet extends DocIdSet { + + protected final IndexReader reader; + + public FieldCacheDocIdSet(IndexReader reader) { + this.reader = reader; + } + + /** + * this method checks, if a doc is a hit + */ + protected abstract boolean matchDoc(int doc); + + /** this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs */ + @Override + public boolean isCacheable() { + return !reader.hasDeletions(); + } + + @Override + public DocIdSetIterator iterator() throws IOException { + if (reader.hasDeletions()) { + // a DocIdSetIterator using TermDocs to iterate valid docIds + final TermDocs termDocs = reader.termDocs(null); + return new DocIdSetIterator() { + private int doc = -1; + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() throws IOException { + do { + if (!termDocs.next()) + return doc = NO_MORE_DOCS; + } while (!matchDoc(doc = termDocs.doc())); + return doc; + } + + @Override + public int advance(int target) throws IOException { + if (!termDocs.skipTo(target)) + return doc = NO_MORE_DOCS; + while (!matchDoc(doc = termDocs.doc())) { + if (!termDocs.next()) + return doc = NO_MORE_DOCS; + } + return doc; + } + }; + } else { + // a DocIdSetIterator generating docIds by incrementing a variable - + // this one can be used if there are no deletions are on the index + // Specialization optimization disregard acceptDocs + final int maxDoc = reader.maxDoc(); + return new DocIdSetIterator() { + private int doc = -1; + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() { + do { + doc++; + if (doc >= maxDoc) { + return doc = NO_MORE_DOCS; + } + } while (!matchDoc(doc)); + return doc; + } + + @Override + public int advance(int target) { + for(doc=target; doc 0 && inclusiveUpperPoint > 0; - // for this DocIdSet, we never need to use TermDocs, - // because deleted docs have an order of 0 (null entry in StringIndex) - return new FieldCacheDocIdSet(reader, false) { + return new FieldCacheDocIdSet(reader) { @Override - final boolean matchDoc(int doc) { + protected final boolean matchDoc(int doc) { return fcsi.order[doc] >= inclusiveLowerPoint && fcsi.order[doc] <= inclusiveUpperPoint; } }; @@ -171,10 +168,9 @@ return DocIdSet.EMPTY_DOCIDSET; final byte[] values = FieldCache.DEFAULT.getBytes(reader, field, (FieldCache.ByteParser) parser); - // we only request the usage of termDocs, if the range contains 0 - return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(reader) { @Override - boolean matchDoc(int doc) { + protected boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -222,10 +218,9 @@ return DocIdSet.EMPTY_DOCIDSET; final short[] values = FieldCache.DEFAULT.getShorts(reader, field, (FieldCache.ShortParser) parser); - // we only request the usage of termDocs, if the range contains 0 - return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(reader) { @Override - boolean matchDoc(int doc) { + protected boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -273,10 +268,9 @@ return DocIdSet.EMPTY_DOCIDSET; final int[] values = FieldCache.DEFAULT.getInts(reader, field, (FieldCache.IntParser) parser); - // we only request the usage of termDocs, if the range contains 0 - return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(reader) { @Override - boolean matchDoc(int doc) { + protected boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -324,10 +318,9 @@ return DocIdSet.EMPTY_DOCIDSET; final long[] values = FieldCache.DEFAULT.getLongs(reader, field, (FieldCache.LongParser) parser); - // we only request the usage of termDocs, if the range contains 0 - return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) { + return new FieldCacheDocIdSet(reader) { @Override - boolean matchDoc(int doc) { + protected boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -379,10 +372,9 @@ return DocIdSet.EMPTY_DOCIDSET; final float[] values = FieldCache.DEFAULT.getFloats(reader, field, (FieldCache.FloatParser) parser); - // we only request the usage of termDocs, if the range contains 0 - return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) { + return new FieldCacheDocIdSet(reader) { @Override - boolean matchDoc(int doc) { + protected boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -434,10 +426,9 @@ return DocIdSet.EMPTY_DOCIDSET; final double[] values = FieldCache.DEFAULT.getDoubles(reader, field, (FieldCache.DoubleParser) parser); - // we only request the usage of termDocs, if the range contains 0 - return new FieldCacheDocIdSet(reader, (inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) { + return new FieldCacheDocIdSet(reader) { @Override - boolean matchDoc(int doc) { + protected boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -500,103 +491,5 @@ /** Returns the current numeric parser ({@code null} for {@code T} is {@code String}} */ public FieldCache.Parser getParser() { return parser; } - - static abstract class FieldCacheDocIdSet extends DocIdSet { - private final IndexReader reader; - private boolean mayUseTermDocs; - - FieldCacheDocIdSet(IndexReader reader, boolean mayUseTermDocs) { - this.reader = reader; - this.mayUseTermDocs = mayUseTermDocs; - } - - /** this method checks, if a doc is a hit, should throw AIOBE, when position invalid */ - abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException; - - /** this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs */ - @Override - public boolean isCacheable() { - return !(mayUseTermDocs && reader.hasDeletions()); - } - @Override - public DocIdSetIterator iterator() throws IOException { - // Synchronization needed because deleted docs BitVector - // can change after call to hasDeletions until TermDocs creation. - // We only use an iterator with termDocs, when this was requested (e.g. range contains 0) - // and the index has deletions - final TermDocs termDocs; - synchronized(reader) { - termDocs = isCacheable() ? null : reader.termDocs(null); - } - if (termDocs != null) { - // a DocIdSetIterator using TermDocs to iterate valid docIds - return new DocIdSetIterator() { - private int doc = -1; - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() throws IOException { - do { - if (!termDocs.next()) - return doc = NO_MORE_DOCS; - } while (!matchDoc(doc = termDocs.doc())); - return doc; - } - - @Override - public int advance(int target) throws IOException { - if (!termDocs.skipTo(target)) - return doc = NO_MORE_DOCS; - while (!matchDoc(doc = termDocs.doc())) { - if (!termDocs.next()) - return doc = NO_MORE_DOCS; - } - return doc; - } - }; - } else { - // a DocIdSetIterator generating docIds by incrementing a variable - - // this one can be used if there are no deletions are on the index - return new DocIdSetIterator() { - private int doc = -1; - - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() { - try { - do { - doc++; - } while (!matchDoc(doc)); - return doc; - } catch (ArrayIndexOutOfBoundsException e) { - return doc = NO_MORE_DOCS; - } - } - - @Override - public int advance(int target) { - try { - doc = target; - while (!matchDoc(doc)) { - doc++; - } - return doc; - } catch (ArrayIndexOutOfBoundsException e) { - return doc = NO_MORE_DOCS; - } - } - }; - } - } - } - } Index: lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (revision 1206033) +++ lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (working copy) @@ -116,9 +116,9 @@ bits.set(termNumber); } } - return new FieldCacheRangeFilter.FieldCacheDocIdSet(reader, true) { + return new FieldCacheDocIdSet(reader) { @Override - boolean matchDoc(int doc) { + protected final boolean matchDoc(int doc) { return bits.get(fcsi.order[doc]); } }; Index: lucene/src/java/org/apache/lucene/search/FieldValueFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldValueFilter.java (revision 1206033) +++ lucene/src/java/org/apache/lucene/search/FieldValueFilter.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.FieldCacheRangeFilter.FieldCacheDocIdSet; import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits.MatchAllBits; import org.apache.lucene.util.Bits.MatchNoBits; @@ -59,21 +58,16 @@ } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField( reader, field); if (negate) { if (docsWithField instanceof MatchAllBits) { return null; } - final int maxDoc = reader.maxDoc(); - return new FieldCacheDocIdSet(reader, true) { + return new FieldCacheDocIdSet(reader) { @Override - final boolean matchDoc(int doc) { - if (doc >= maxDoc) { - // TODO: this makes no sense we should check this on the caller level - throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc); - } + protected final boolean matchDoc(int doc) { return !docsWithField.get(doc); } }; @@ -84,20 +78,18 @@ if (docsWithField instanceof DocIdSet) { // UweSays: this is always the case for our current impl - but who knows // :-) - /* - * TODO this could deliver delete docs but FCDID seems broken too if - * this filter is not used with another query - */ - return (DocIdSet) docsWithField; + final DocIdSet dis = (DocIdSet) docsWithField; + return (reader.hasDeletions()) ? + new FilteredDocIdSet(dis) { + @Override + protected final boolean match(int doc) { + return !reader.isDeleted(doc); + } + } : dis; } - final int maxDoc = reader.maxDoc(); - return new FieldCacheDocIdSet(reader, true) { + return new FieldCacheDocIdSet(reader) { @Override - final boolean matchDoc(int doc) { - if (doc >= maxDoc) { - // TODO: this makes no sense we should check this on the caller level - throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc); - } + protected final boolean matchDoc(int doc) { return docsWithField.get(doc); } }; @@ -134,7 +126,7 @@ @Override public String toString() { - return "NoFieldValueFilter [field=" + field + ", negate=" + negate + "]"; + return "FieldValueFilter [field=" + field + ", negate=" + negate + "]"; } } Index: lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (revision 1206033) +++ lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (working copy) @@ -535,7 +535,6 @@ search.close(); } - // test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 @Test public void testSparseIndex() throws IOException { Directory dir = newDirectory(); @@ -573,11 +572,11 @@ assertEquals("find all", 20, result.length); result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) 10),Byte.valueOf((byte) 20),T,T), 100).scoreDocs; - assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); assertEquals("find all", 11, result.length); result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",Byte.valueOf((byte) -20),Byte.valueOf((byte) -10),T,T), 100).scoreDocs; - assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); + assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); assertEquals("find all", 11, result.length); search.close(); reader.close();