Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (revision 1173516)
+++ lucene/CHANGES.txt (working copy)
@@ -5,6 +5,19 @@
======================= Lucene 3.5.0 =======================
+Changes in backwards compatibility policy
+
+* LUCENE-3390: The first approach in Lucene 3.4.0 for missing values
+ support for sorting had a design problem that made the missing value
+ be populated directly into the FieldCache arrays during sorting,
+ leading to concurrency issues. To fix this behaviour, the method
+ signatures had to be changed:
+ - FieldCache.getUnValuedDocs() returns the interface Bits instead DocIdSet
+ - FieldComparator.setMissingValue() was removed and added to
+ constructor
+ As this is expert API, most code will not be affected.
+ (Uwe Schindler)
+
Bug fixes
* SOLR-2762: (backport form 4.x line): FSTLookup could return duplicate
@@ -25,6 +38,11 @@
QueryWrapperFilter and similar classes to get a top-level DocIdSet.
(Dan C., Uwe Schindler)
+* LUCENE-3390: Corrected handling of missing values when two parallel searches
+ using different missing values for sorting: the missing value was populated
+ directly into the FieldCache arrays during sorting, leading to concurrency
+ issues. (Uwe Schindler)
+
New Features
Optimizations
Index: lucene/src/java/org/apache/lucene/search/FieldCache.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/FieldCache.java (revision 1173516)
+++ lucene/src/java/org/apache/lucene/search/FieldCache.java (working copy)
@@ -22,6 +22,7 @@
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.document.NumericField; // for javadocs
import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
+import org.apache.lucene.util.Bits;
import java.io.IOException;
import java.io.Serializable;
@@ -315,7 +316,7 @@
* reader.maxDoc(), with turned on bits for each docid that
* does not have a value for this field.
*/
- public DocIdSet getUnValuedDocs (IndexReader reader, String field)
+ public Bits getUnValuedDocs (IndexReader reader, String field)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is
Index: lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 1173516)
+++ lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy)
@@ -30,9 +30,8 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.util.BitVector;
-import org.apache.lucene.util.DocIdBitSet;
-import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.FieldCacheSanityChecker;
@@ -413,6 +412,11 @@
return retArray;
}
}
+
+ public Bits getUnValuedDocs(IndexReader reader, String field)
+ throws IOException {
+ return (Bits) caches.get(UnValuedDocsCache.class).get(reader, new Entry(field, null));
+ }
static final class UnValuedDocsCache extends Cache {
UnValuedDocsCache(FieldCache wrapper) {
@@ -422,34 +426,32 @@
@Override
protected Object createValue(IndexReader reader, Entry entryKey)
throws IOException {
- Entry entry = entryKey;
- String field = entry.field;
-
- if (reader.maxDoc() == reader.docFreq(new Term(field))) {
- return DocIdSet.EMPTY_DOCIDSET;
- }
-
- OpenBitSet res = new OpenBitSet(reader.maxDoc());
- TermDocs termDocs = reader.termDocs();
- TermEnum termEnum = reader.terms (new Term (field));
+ final Entry entry = entryKey;
+ final String field = entry.field;
+ final FixedBitSet res = new FixedBitSet(reader.maxDoc());
+ final TermDocs termDocs = reader.termDocs();
+ final TermEnum termEnum = reader.terms(new Term(field));
try {
do {
- Term term = termEnum.term();
- if (term==null || term.field() != field) break;
- termDocs.seek (termEnum);
+ final Term term = termEnum.term();
+ if (term == null || term.field() != field) break;
+ termDocs.seek(termEnum);
while (termDocs.next()) {
- res.fastSet(termDocs.doc());
+ res.set(termDocs.doc());
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
+ // The cardinality of the BitSet is numDocs if all documents have a value.
+ // As deleted docs are not in TermDocs, this is always true
+ if (res.cardinality() >= reader.numDocs())
+ return new Bits.MatchNoBits(reader.maxDoc());
res.flip(0, reader.maxDoc());
return res;
}
}
-
// inherit javadocs
public float[] getFloats (IndexReader reader, String field)
@@ -736,10 +738,5 @@
public PrintStream getInfoStream() {
return infoStream;
}
-
- public DocIdSet getUnValuedDocs(IndexReader reader, String field)
- throws IOException {
- return (DocIdSet) caches.get(UnValuedDocsCache.class).get(reader, new Entry(field, null));
- }
}
Index: lucene/src/java/org/apache/lucene/search/FieldComparator.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/FieldComparator.java (revision 1173516)
+++ lucene/src/java/org/apache/lucene/search/FieldComparator.java (working copy)
@@ -29,7 +29,7 @@
import org.apache.lucene.search.FieldCache.IntParser;
import org.apache.lucene.search.FieldCache.ShortParser;
import org.apache.lucene.search.FieldCache.StringIndex;
-import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.Bits;
/**
* Expert: a FieldComparator compares hits so as to determine their
@@ -84,14 +84,6 @@
*/
public abstract class FieldComparator {
- protected T missingValue = null;
-
- /** Set a default sorting value for documents which lacks one */
- public FieldComparator setMissingValue(T missingValue) {
- this.missingValue = missingValue;
- return this;
- }
-
/**
* Compare hit at slot1 with hit at slot2.
*
@@ -191,18 +183,46 @@
}
}
+ public static abstract class NumericComparator extends FieldComparator {
+ protected final T missingValue;
+ protected final String field;
+
+ protected Bits unvaluedDocs = null;
+
+ public NumericComparator(String field, T missingValue) {
+ this.field = field;
+ this.missingValue = missingValue;
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ if (missingValue != null) {
+ unvaluedDocs = FieldCache.DEFAULT.getUnValuedDocs(reader, field);
+ // optimization to remove unneeded checks on the bit interface:
+ if (unvaluedDocs instanceof Bits.MatchNoBits)
+ unvaluedDocs = null;
+ } else {
+ unvaluedDocs = null;
+ }
+ }
+
+ }
+
/** Parses field's values as byte (using {@link
* FieldCache#getBytes} and sorts by ascending value */
- public static final class ByteComparator extends FieldComparator {
+ public static final class ByteComparator extends NumericComparator {
private final byte[] values;
private byte[] currentReaderValues;
- private final String field;
private ByteParser parser;
private byte bottom;
ByteComparator(int numHits, String field, FieldCache.Parser parser) {
+ this(numHits, field, parser, null);
+ }
+
+ ByteComparator(int numHits, String field, FieldCache.Parser parser, Byte missingValue) {
+ super(field, missingValue);
values = new byte[numHits];
- this.field = field;
this.parser = (ByteParser) parser;
}
@@ -213,24 +233,24 @@
@Override
public int compareBottom(int doc) {
- return bottom - currentReaderValues[doc];
+ byte v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
+ return bottom - v2;
}
@Override
public void copy(int slot, int doc) {
- values[slot] = currentReaderValues[doc];
+ byte v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
+ values[slot] = v2;
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
currentReaderValues = FieldCache.DEFAULT.getBytes(reader, field, parser);
- if (missingValue != null) {
- DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
- final byte byteValue = missingValue.byteValue();
- for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
- currentReaderValues[doc] = byteValue;
- }
- }
+ super.setNextReader(reader, docBase);
}
@Override
@@ -292,16 +312,19 @@
/** Parses field's values as double (using {@link
* FieldCache#getDoubles} and sorts by ascending value */
- public static final class DoubleComparator extends FieldComparator {
+ public static final class DoubleComparator extends NumericComparator {
private final double[] values;
private double[] currentReaderValues;
- private final String field;
private DoubleParser parser;
private double bottom;
DoubleComparator(int numHits, String field, FieldCache.Parser parser) {
+ this(numHits, field, parser, null);
+ }
+
+ DoubleComparator(int numHits, String field, FieldCache.Parser parser, Double missingValue) {
+ super(field, missingValue);
values = new double[numHits];
- this.field = field;
this.parser = (DoubleParser) parser;
}
@@ -320,7 +343,9 @@
@Override
public int compareBottom(int doc) {
- final double v2 = currentReaderValues[doc];
+ double v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
if (bottom > v2) {
return 1;
} else if (bottom < v2) {
@@ -332,19 +357,16 @@
@Override
public void copy(int slot, int doc) {
- values[slot] = currentReaderValues[doc];
+ double v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
+ values[slot] = v2;
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
currentReaderValues = FieldCache.DEFAULT.getDoubles(reader, field, parser);
- if (missingValue != null) {
- DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
- final double doubleValue = missingValue.doubleValue();
- for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
- currentReaderValues[doc] = doubleValue;
- }
- }
+ super.setNextReader(reader, docBase);
}
@Override
@@ -360,16 +382,19 @@
/** Parses field's values as float (using {@link
* FieldCache#getFloats} and sorts by ascending value */
- public static final class FloatComparator extends FieldComparator {
+ public static final class FloatComparator extends NumericComparator {
private final float[] values;
private float[] currentReaderValues;
- private final String field;
private FloatParser parser;
private float bottom;
FloatComparator(int numHits, String field, FieldCache.Parser parser) {
+ this(numHits, field, parser, null);
+ }
+
+ FloatComparator(int numHits, String field, FieldCache.Parser parser, Float missingValue) {
+ super(field, missingValue);
values = new float[numHits];
- this.field = field;
this.parser = (FloatParser) parser;
}
@@ -392,7 +417,9 @@
public int compareBottom(int doc) {
// TODO: are there sneaky non-branch ways to compute
// sign of float?
- final float v2 = currentReaderValues[doc];
+ float v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
if (bottom > v2) {
return 1;
} else if (bottom < v2) {
@@ -404,19 +431,16 @@
@Override
public void copy(int slot, int doc) {
- values[slot] = currentReaderValues[doc];
+ float v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
+ values[slot] = v2;
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
currentReaderValues = FieldCache.DEFAULT.getFloats(reader, field, parser);
- if (missingValue != null) {
- DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
- final float floatValue = missingValue.floatValue();
- for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
- currentReaderValues[doc] = floatValue;
- }
- }
+ super.setNextReader(reader, docBase);
}
@Override
@@ -432,16 +456,19 @@
/** Parses field's values as int (using {@link
* FieldCache#getInts} and sorts by ascending value */
- public static final class IntComparator extends FieldComparator {
+ public static final class IntComparator extends NumericComparator {
private final int[] values;
private int[] currentReaderValues;
- private final String field;
private IntParser parser;
private int bottom; // Value of bottom of queue
IntComparator(int numHits, String field, FieldCache.Parser parser) {
+ this(numHits, field, parser, null);
+ }
+
+ IntComparator(int numHits, String field, FieldCache.Parser parser, Integer missingValue) {
+ super(field, missingValue);
values = new int[numHits];
- this.field = field;
this.parser = (IntParser) parser;
}
@@ -468,7 +495,9 @@
// -1/+1/0 sign
// Cannot return bottom - values[slot2] because that
// may overflow
- final int v2 = currentReaderValues[doc];
+ int v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
if (bottom > v2) {
return 1;
} else if (bottom < v2) {
@@ -480,19 +509,16 @@
@Override
public void copy(int slot, int doc) {
- values[slot] = currentReaderValues[doc];
+ int v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
+ values[slot] = v2;
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
currentReaderValues = FieldCache.DEFAULT.getInts(reader, field, parser);
- if (missingValue != null) {
- DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
- final int intValue = missingValue.intValue();
- for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
- currentReaderValues[doc] = intValue;
- }
- }
+ super.setNextReader(reader, docBase);
}
@Override
@@ -508,16 +534,19 @@
/** Parses field's values as long (using {@link
* FieldCache#getLongs} and sorts by ascending value */
- public static final class LongComparator extends FieldComparator {
+ public static final class LongComparator extends NumericComparator {
private final long[] values;
private long[] currentReaderValues;
- private final String field;
private LongParser parser;
private long bottom;
LongComparator(int numHits, String field, FieldCache.Parser parser) {
+ this(numHits, field, parser, null);
+ }
+
+ LongComparator(int numHits, String field, FieldCache.Parser parser, Long missingValue) {
+ super(field, missingValue);
values = new long[numHits];
- this.field = field;
this.parser = (LongParser) parser;
}
@@ -540,7 +569,9 @@
public int compareBottom(int doc) {
// TODO: there are sneaky non-branch ways to compute
// -1/+1/0 sign
- final long v2 = currentReaderValues[doc];
+ long v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
if (bottom > v2) {
return 1;
} else if (bottom < v2) {
@@ -552,19 +583,16 @@
@Override
public void copy(int slot, int doc) {
- values[slot] = currentReaderValues[doc];
+ long v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
+ values[slot] = v2;
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
currentReaderValues = FieldCache.DEFAULT.getLongs(reader, field, parser);
- if (missingValue != null) {
- DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
- final long longValue = missingValue.longValue();
- for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
- currentReaderValues[doc] = longValue;
- }
- }
+ super.setNextReader(reader, docBase);
}
@Override
@@ -648,16 +676,19 @@
/** Parses field's values as short (using {@link
* FieldCache#getShorts} and sorts by ascending value */
- public static final class ShortComparator extends FieldComparator {
+ public static final class ShortComparator extends NumericComparator {
private final short[] values;
private short[] currentReaderValues;
- private final String field;
private ShortParser parser;
private short bottom;
ShortComparator(int numHits, String field, FieldCache.Parser parser) {
+ this(numHits, field, parser, null);
+ }
+
+ ShortComparator(int numHits, String field, FieldCache.Parser parser, Short missingValue) {
+ super(field, missingValue);
values = new short[numHits];
- this.field = field;
this.parser = (ShortParser) parser;
}
@@ -668,24 +699,24 @@
@Override
public int compareBottom(int doc) {
- return bottom - currentReaderValues[doc];
+ short v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
+ return bottom - v2;
}
@Override
public void copy(int slot, int doc) {
- values[slot] = currentReaderValues[doc];
+ short v2 = currentReaderValues[doc];
+ if (unvaluedDocs != null && v2 == 0 && unvaluedDocs.get(doc))
+ v2 = missingValue;
+ values[slot] = v2;
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
currentReaderValues = FieldCache.DEFAULT.getShorts(reader, field, parser);
- if (missingValue != null) {
- DocIdSetIterator iterator = FieldCache.DEFAULT.getUnValuedDocs(reader, field).iterator();
- final short shortValue = missingValue.shortValue();
- for (int doc=iterator.nextDoc(); doc!=DocIdSetIterator.NO_MORE_DOCS; doc=iterator.nextDoc()) {
- currentReaderValues[doc] = shortValue;
- }
- }
+ super.setNextReader(reader, docBase);
}
@Override
Index: lucene/src/java/org/apache/lucene/search/SortField.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/SortField.java (revision 1173516)
+++ lucene/src/java/org/apache/lucene/search/SortField.java (working copy)
@@ -403,22 +403,22 @@
return new FieldComparator.DocComparator(numHits);
case SortField.INT:
- return new FieldComparator.IntComparator(numHits, field, parser).setMissingValue((Integer) missingValue);
+ return new FieldComparator.IntComparator(numHits, field, parser, (Integer) missingValue);
case SortField.FLOAT:
- return new FieldComparator.FloatComparator(numHits, field, parser).setMissingValue((Float) missingValue);
+ return new FieldComparator.FloatComparator(numHits, field, parser, (Float) missingValue);
case SortField.LONG:
- return new FieldComparator.LongComparator(numHits, field, parser).setMissingValue((Long) missingValue);
+ return new FieldComparator.LongComparator(numHits, field, parser, (Long) missingValue);
case SortField.DOUBLE:
- return new FieldComparator.DoubleComparator(numHits, field, parser).setMissingValue((Double) missingValue);
+ return new FieldComparator.DoubleComparator(numHits, field, parser, (Double) missingValue);
case SortField.BYTE:
- return new FieldComparator.ByteComparator(numHits, field, parser).setMissingValue((Byte) missingValue);
+ return new FieldComparator.ByteComparator(numHits, field, parser, (Byte) missingValue);
case SortField.SHORT:
- return new FieldComparator.ShortComparator(numHits, field, parser).setMissingValue((Short) missingValue);
+ return new FieldComparator.ShortComparator(numHits, field, parser, (Short) missingValue);
case SortField.CUSTOM:
assert comparatorSource != null;
Index: lucene/src/java/org/apache/lucene/util/BitVector.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/BitVector.java (revision 1173516)
+++ lucene/src/java/org/apache/lucene/util/BitVector.java (working copy)
@@ -34,7 +34,7 @@
*
* @lucene.internal
*/
-public final class BitVector implements Cloneable {
+public final class BitVector implements Cloneable, Bits {
private byte[] bits;
private int size;
@@ -120,6 +120,12 @@
return size;
}
+ /** Returns the number of bits in this vector. This is also one greater than
+ the number of the largest valid bit number. */
+ public final int length() {
+ return size;
+ }
+
/** Returns the total number of one bits in this vector. This is efficiently
computed and cached, so that, if the vector is not changed, no
recomputation is done for repeated calls. */
Index: lucene/src/java/org/apache/lucene/util/FixedBitSet.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/FixedBitSet.java (revision 1173516)
+++ lucene/src/java/org/apache/lucene/util/FixedBitSet.java (working copy)
@@ -36,7 +36,7 @@
* @lucene.internal
**/
-public final class FixedBitSet extends DocIdSet {
+public final class FixedBitSet extends DocIdSet implements Bits {
private final long[] bits;
private int numBits;
Index: lucene/src/java/org/apache/lucene/util/OpenBitSet.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/OpenBitSet.java (revision 1173516)
+++ lucene/src/java/org/apache/lucene/util/OpenBitSet.java (working copy)
@@ -75,7 +75,7 @@
*/
-public class OpenBitSet extends DocIdSet implements Cloneable, Serializable {
+public class OpenBitSet extends DocIdSet implements Cloneable, Serializable, Bits {
protected long[] bits;
protected int wlen; // number of words (elements) used in the array
@@ -137,6 +137,10 @@
return capacity();
}
+ public int length() {
+ return bits.length << 6;
+ }
+
/** Returns true if there are no set bits */
public boolean isEmpty() { return cardinality()==0; }