Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (revision 1099961)
+++ lucene/CHANGES.txt (working copy)
@@ -9,12 +9,28 @@
a method getHeapArray() was added to retrieve the internal heap array as a
non-generic Object[]. (Uwe Schindler, Yonik Seeley)
+Changes in runtime behavior
+
+* LUCENE-3065: When a NumericField is retrieved from a Document loaded
+ from IndexReader (or IndexSearcher), it will now come back as
+ NumericField not as a Field with a string-ified version of the
+ numeric value you had indexed. Note that this only applies for
+ newly-indexed Documents; older indices will still return Field
+ with the string-ified numeric value. If you call Document.get(),
+ the value comes still back as String, but Document.getFieldable()
+ returns NumericField instances. (Uwe Schindler, Ryan McKinley,
+ Mike McCandless)
+
API Changes
* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public
(though @lucene.experimental), allowing for custom MergeScheduler
implementations. (Shai Erera)
+* LUCENE-3065: Document.getField() was deprecated, as it throws
+ ClassCastException when loading lazy fields or NumericFields.
+ (Uwe Schindler, Ryan McKinley, Mike McCandless)
+
Optimizations
* LUCENE-2990: ArrayUtil/CollectionUtil.*Sort() methods now exit early
Index: lucene/src/java/org/apache/lucene/document/Document.java
===================================================================
--- lucene/src/java/org/apache/lucene/document/Document.java (revision 1099961)
+++ lucene/src/java/org/apache/lucene/document/Document.java (working copy)
@@ -132,8 +132,13 @@
/** Returns a field with the given name if any exist in this document, or
* null. If multiple fields exists with this name, this method returns the
* first value added.
- * Do not use this method with lazy loaded fields.
+ * Do not use this method with lazy loaded fields or {@link NumericField}.
+ * @deprecated use {@link #getFieldable} instead and cast depending on
+ * data type.
+ * @throws ClassCastException if you try to retrieve a numerical or
+ * lazy loaded field.
*/
+ @Deprecated
public final Field getField(String name) {
return (Field) getFieldable(name);
}
@@ -155,6 +160,8 @@
* this document, or null. If multiple fields exist with this name, this
* method returns the first value added. If only binary fields with this name
* exist, returns null.
+ * For {@link NumericField} it returns the string value of the number. If you want
+ * the actual {@code NumericField} instance back, use {@link #getFieldable}.
*/
public final String get(String name) {
for (Fieldable field : fields) {
@@ -178,13 +185,18 @@
/**
* Returns an array of {@link Field}s with the given name.
- * Do not use with lazy loaded fields.
* This method returns an empty array when there are no
* matching fields. It never returns null.
+ * Do not use this method with lazy loaded fields or {@link NumericField}.
*
* @param name the name of the field
* @return a Field[] array
+ * @deprecated use {@link #getFieldable} instead and cast depending on
+ * data type.
+ * @throws ClassCastException if you try to retrieve a numerical or
+ * lazy loaded field.
*/
+ @Deprecated
public final Field[] getFields(String name) {
List result = new ArrayList();
for (Fieldable field : fields) {
@@ -231,6 +243,8 @@
* Returns an array of values of the field specified as the method parameter.
* This method returns an empty array when there are no
* matching fields. It never returns null.
+ * For {@link NumericField}s it returns the string value of the number. If you want
+ * the actual {@code NumericField} instances back, use {@link #getFieldables}.
* @param name the name of the field
* @return a String[] of field values
*/
Index: lucene/src/java/org/apache/lucene/document/NumericField.java
===================================================================
--- lucene/src/java/org/apache/lucene/document/NumericField.java (revision 1099961)
+++ lucene/src/java/org/apache/lucene/document/NumericField.java (working copy)
@@ -127,19 +127,19 @@
* class is a wrapper around this token stream type for
* easier, more intuitive usage.
*
- * NOTE: This class is only used during
- * indexing. When retrieving the stored field value from a
- * {@link Document} instance after search, you will get a
- * conventional {@link Fieldable} instance where the numeric
- * values are returned as {@link String}s (according to
- * toString(value) of the used data type).
- *
* @since 2.9
*/
public final class NumericField extends AbstractField {
- private final NumericTokenStream numericTS;
+ /** Data type of the value in {@link NumericField}.
+ * @since 3.2
+ */
+ public static enum DataType { INT, LONG, FLOAT, DOUBLE }
+ private transient NumericTokenStream numericTS;
+ private DataType type;
+ private final int precisionStep;
+
/**
* Creates a field for numeric values using the default precisionStep
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
@@ -158,8 +158,8 @@
* a numeric value, before indexing a document containing this field,
* set a value using the various set???Value() methods.
* @param name the field name
- * @param store if the field should be stored in plain text form
- * (according to toString(value) of the used data type)
+ * @param store if the field should be stored, {@link Document#getFieldable}
+ * then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
*/
public NumericField(String name, Field.Store store, boolean index) {
@@ -186,19 +186,43 @@
* set a value using the various set???Value() methods.
* @param name the field name
* @param precisionStep the used precision step
- * @param store if the field should be stored in plain text form
- * (according to toString(value) of the used data type)
+ * @param store if the field should be stored, {@link Document#getFieldable}
+ * then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
*/
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
+ this.precisionStep = precisionStep;
setOmitTermFreqAndPositions(true);
- numericTS = new NumericTokenStream(precisionStep);
}
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
public TokenStream tokenStreamValue() {
- return isIndexed() ? numericTS : null;
+ if (!isIndexed())
+ return null;
+ if (numericTS == null) {
+ // lazy init the TokenStream as it is heavy to instantiate (attributes,...),
+ // if not needed (stored field loading)
+ numericTS = new NumericTokenStream(precisionStep);
+ // initialize value in TokenStream
+ if (fieldsData != null) {
+ assert type != null;
+ final Number val = (Number) fieldsData;
+ switch (type) {
+ case INT:
+ numericTS.setIntValue(val.intValue()); break;
+ case LONG:
+ numericTS.setLongValue(val.longValue()); break;
+ case FLOAT:
+ numericTS.setFloatValue(val.floatValue()); break;
+ case DOUBLE:
+ numericTS.setDoubleValue(val.doubleValue()); break;
+ default:
+ assert false : "Should never get here";
+ }
+ }
+ }
+ return numericTS;
}
/** Returns always null for numeric fields */
@@ -212,7 +236,10 @@
return null;
}
- /** Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). */
+ /** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
+ * on search results. It is recommended to use {@link Document#getFieldable} instead
+ * that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
+ * to return the stored value. */
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
@@ -224,9 +251,16 @@
/** Returns the precision step. */
public int getPrecisionStep() {
- return numericTS.getPrecisionStep();
+ return precisionStep;
}
+ /** Returns the data type of the current value, {@code null} if not yet set.
+ * @since 3.2
+ */
+ public DataType getDataType() {
+ return type;
+ }
+
/**
* Initializes the field with the supplied long value.
* @param value the numeric value
@@ -234,8 +268,9 @@
* document.add(new NumericField(name, precisionStep).setLongValue(value))
*/
public NumericField setLongValue(final long value) {
- numericTS.setLongValue(value);
+ if (numericTS != null) numericTS.setLongValue(value);
fieldsData = Long.valueOf(value);
+ type = DataType.LONG;
return this;
}
@@ -246,8 +281,9 @@
* document.add(new NumericField(name, precisionStep).setIntValue(value))
*/
public NumericField setIntValue(final int value) {
- numericTS.setIntValue(value);
+ if (numericTS != null) numericTS.setIntValue(value);
fieldsData = Integer.valueOf(value);
+ type = DataType.INT;
return this;
}
@@ -258,8 +294,9 @@
* document.add(new NumericField(name, precisionStep).setDoubleValue(value))
*/
public NumericField setDoubleValue(final double value) {
- numericTS.setDoubleValue(value);
+ if (numericTS != null) numericTS.setDoubleValue(value);
fieldsData = Double.valueOf(value);
+ type = DataType.DOUBLE;
return this;
}
@@ -270,8 +307,9 @@
* document.add(new NumericField(name, precisionStep).setFloatValue(value))
*/
public NumericField setFloatValue(final float value) {
- numericTS.setFloatValue(value);
+ if (numericTS != null) numericTS.setFloatValue(value);
fieldsData = Float.valueOf(value);
+ type = DataType.FLOAT;
return this;
}
Index: lucene/src/java/org/apache/lucene/index/FieldsReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/FieldsReader.java (revision 1099961)
+++ lucene/src/java/org/apache/lucene/index/FieldsReader.java (working copy)
@@ -17,6 +17,10 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
+import java.util.zip.DataFormatException;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.CompressionTools;
@@ -25,16 +29,13 @@
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.NumericField;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.AlreadyClosedException;
-import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.util.CloseableThreadLocal;
-import java.io.IOException;
-import java.io.Reader;
-import java.util.zip.DataFormatException;
-
/**
* Class responsible for access to stored document fields.
*
@@ -240,43 +241,43 @@
Document doc = new Document();
int numFields = fieldsStream.readVInt();
- for (int i = 0; i < numFields; i++) {
+ out: for (int i = 0; i < numFields; i++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
- byte bits = fieldsStream.readByte();
- assert bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
+ int bits = fieldsStream.readByte() & 0xFF;
+ assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_COMPRESSED | FieldsWriter.FIELD_IS_TOKENIZED | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
assert (compressed ? (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS) : true)
: "compressed fields are only allowed in indexes of version <= 2.9";
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
- //TODO: Find an alternative approach here if this list continues to grow beyond the
- //list of 5 or 6 currently here. See Lucene 762 for discussion
- if (acceptField.equals(FieldSelectorResult.LOAD)) {
- addField(doc, fi, binary, compressed, tokenize);
+ final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK;
+
+ switch (acceptField) {
+ case LOAD:
+ addField(doc, fi, binary, compressed, tokenize, numeric);
+ break;
+ case LOAD_AND_BREAK:
+ addField(doc, fi, binary, compressed, tokenize, numeric);
+ break out; //Get out of this loop
+ case LAZY_LOAD:
+ addFieldLazy(doc, fi, binary, compressed, tokenize, true, numeric);
+ break;
+ case LATENT:
+ addFieldLazy(doc, fi, binary, compressed, tokenize, false, numeric);
+ break;
+ case SIZE:
+ skipFieldBytes(binary, compressed, addFieldSize(doc, fi, binary, compressed, numeric));
+ break;
+ case SIZE_AND_BREAK:
+ addFieldSize(doc, fi, binary, compressed, numeric);
+ break out; //Get out of this loop
+ default:
+ skipField(binary, compressed, numeric);
}
- else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
- addField(doc, fi, binary, compressed, tokenize);
- break;//Get out of this loop
- }
- else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
- addFieldLazy(doc, fi, binary, compressed, tokenize, true);
- } else if (acceptField.equals(FieldSelectorResult.LATENT)) {
- addFieldLazy(doc, fi, binary, compressed, tokenize, false);
- }
- else if (acceptField.equals(FieldSelectorResult.SIZE)){
- skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
- }
- else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
- addFieldSize(doc, fi, binary, compressed);
- break;
- }
- else {
- skipField(binary, compressed);
- }
}
return doc;
@@ -312,41 +313,73 @@
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
* This will have the most payoff on large fields.
*/
- private void skipField(boolean binary, boolean compressed) throws IOException {
- skipField(binary, compressed, fieldsStream.readVInt());
+ private void skipField(boolean binary, boolean compressed, int numeric) throws IOException {
+ final int numBytes;
+ switch(numeric) {
+ case 0:
+ numBytes = fieldsStream.readVInt();
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_INT:
+ case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ numBytes = 4;
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+ case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ numBytes = 8;
+ break;
+ default:
+ throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
+
+ skipFieldBytes(binary, compressed, numBytes);
}
- private void skipField(boolean binary, boolean compressed, int toRead) throws IOException {
- if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) {
- fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
- } else {
- // We need to skip chars. This will slow us down, but still better
- fieldsStream.skipChars(toRead);
- }
+ private void skipFieldBytes(boolean binary, boolean compressed, int toRead) throws IOException {
+ if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) {
+ fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
+ } else {
+ // We need to skip chars. This will slow us down, but still better
+ fieldsStream.skipChars(toRead);
+ }
}
- private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, boolean cacheResult) throws IOException {
+ private NumericField loadNumericField(FieldInfo fi, int numeric) throws IOException {
+ assert numeric != 0;
+ switch(numeric) {
+ case FieldsWriter.FIELD_IS_NUMERIC_INT:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setIntValue(fieldsStream.readInt());
+ case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setLongValue(fieldsStream.readLong());
+ case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setFloatValue(Float.intBitsToFloat(fieldsStream.readInt()));
+ case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setDoubleValue(Double.longBitsToDouble(fieldsStream.readLong()));
+ default:
+ throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
+ }
+
+ private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, boolean cacheResult, int numeric) throws IOException {
+ final AbstractField f;
if (binary) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
- //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, compressed, cacheResult));
+ f = new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, compressed, cacheResult);
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
+ } else if (numeric != 0) {
+ f = loadNumericField(fi, numeric);
} else {
Field.Store store = Field.Store.YES;
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
- AbstractField f;
if (compressed) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
f = new LazyField(fi.name, store, toRead, pointer, binary, compressed, cacheResult);
//skip over the part that we aren't loading
fieldsStream.seek(pointer + toRead);
- f.setOmitNorms(fi.omitNorms);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
} else {
int length = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
@@ -357,16 +390,16 @@
fieldsStream.skipChars(length);
}
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, compressed, cacheResult);
- f.setOmitNorms(fi.omitNorms);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
- }
-
- doc.add(f);
+ }
}
-
+
+ f.setOmitNorms(fi.omitNorms);
+ f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+ doc.add(f);
}
- private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException {
+ private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, int numeric) throws CorruptIndexException, IOException {
+ final AbstractField f;
//we have a binary stored field, and it may be compressed
if (binary) {
@@ -374,19 +407,18 @@
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
if (compressed) {
- doc.add(new Field(fi.name, uncompress(b)));
+ f = new Field(fi.name, uncompress(b));
} else {
- doc.add(new Field(fi.name, b));
+ f = new Field(fi.name, b);
}
+ } else if (numeric != 0) {
+ f = loadNumericField(fi, numeric);
} else {
Field.Store store = Field.Store.YES;
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
-
- AbstractField f;
if (compressed) {
int toRead = fieldsStream.readVInt();
-
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
f = new Field(fi.name, // field name
@@ -395,8 +427,6 @@
store,
index,
termVector);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
- f.setOmitNorms(fi.omitNorms);
} else {
f = new Field(fi.name, // name
false,
@@ -404,19 +434,35 @@
store,
index,
termVector);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
- f.setOmitNorms(fi.omitNorms);
}
-
- doc.add(f);
}
+
+ f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+ f.setOmitNorms(fi.omitNorms);
+ doc.add(f);
}
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
// Read just the size -- caller must skip the field content to continue reading fields
// Return the size in bytes or chars, depending on field type
- private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {
- int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
+ private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed, int numeric) throws IOException {
+ final int bytesize, size;
+ switch(numeric) {
+ case 0:
+ size = fieldsStream.readVInt();
+ bytesize = (binary || compressed) ? size : 2*size;
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_INT:
+ case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ size = bytesize = 4;
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+ case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ size = bytesize = 8;
+ break;
+ default:
+ throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
byte[] sizebytes = new byte[4];
sizebytes[0] = (byte) (bytesize>>>24);
sizebytes[1] = (byte) (bytesize>>>16);
@@ -427,7 +473,7 @@
}
/**
- * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
+ * A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is
* loaded.
*/
private class LazyField extends AbstractField implements Fieldable {
@@ -519,13 +565,13 @@
} catch (IOException e) {
throw new FieldReaderException(e);
}
- if (cacheResult == true){
- fieldsData = value;
- }
- return value;
+ if (cacheResult){
+ fieldsData = value;
+ }
+ return value;
} else{
- return (String) fieldsData;
- }
+ return (String) fieldsData;
+ }
}
}
@@ -574,25 +620,24 @@
if (isCompressed == true) {
value = uncompress(b);
} else {
- value = b;
- }
+ value = b;
+ }
} catch (IOException e) {
throw new FieldReaderException(e);
}
binaryOffset = 0;
binaryLength = toRead;
- if (cacheResult == true){
- fieldsData = value;
- }
- return value;
+ if (cacheResult == true){
+ fieldsData = value;
+ }
+ return value;
} else{
- return (byte[]) fieldsData;
- }
-
-
- } else
- return null;
+ return (byte[]) fieldsData;
+ }
+ } else {
+ return null;
+ }
}
}
Index: lucene/src/java/org/apache/lucene/index/FieldsWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/FieldsWriter.java (revision 1099961)
+++ lucene/src/java/org/apache/lucene/index/FieldsWriter.java (working copy)
@@ -21,20 +21,33 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
final class FieldsWriter {
- static final byte FIELD_IS_TOKENIZED = 0x1;
- static final byte FIELD_IS_BINARY = 0x2;
-
+ static final int FIELD_IS_TOKENIZED = 1 << 0;
+ static final int FIELD_IS_BINARY = 1 << 1;
+
/** @deprecated Kept for backwards-compatibility with <3.0 indexes; will be removed in 4.0 */
@Deprecated
- static final byte FIELD_IS_COMPRESSED = 0x4;
+ static final int FIELD_IS_COMPRESSED = 1 << 2;
+ private static final int _NUMERIC_BIT_SHIFT = 3;
+ static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
+
+ static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
+ static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
+ static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
+ static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
+ // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
+ // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
+
+ // the next possible bits are: 1 << 6; 1 << 7
+
// Original format
static final int FORMAT = 0;
@@ -44,10 +57,13 @@
// Lucene 3.0: Removal of compressed fields
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
+ // Lucene 3.2: NumericFields are stored in binary format
+ static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
+
// NOTE: if you introduce a new format, make it 1 higher
// than the current one, and always change this if you
// switch to a new format!
- static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+ static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
private FieldInfos fieldInfos;
@@ -134,14 +150,28 @@
final void writeField(FieldInfo fi, Fieldable field) throws IOException {
fieldsStream.writeVInt(fi.number);
- byte bits = 0;
+ int bits = 0;
if (field.isTokenized())
- bits |= FieldsWriter.FIELD_IS_TOKENIZED;
+ bits |= FIELD_IS_TOKENIZED;
if (field.isBinary())
- bits |= FieldsWriter.FIELD_IS_BINARY;
+ bits |= FIELD_IS_BINARY;
+ if (field instanceof NumericField) {
+ final NumericField.DataType type = ((NumericField) field).getDataType();
+ switch (type) {
+ case INT:
+ bits |= FIELD_IS_NUMERIC_INT; break;
+ case LONG:
+ bits |= FIELD_IS_NUMERIC_LONG; break;
+ case FLOAT:
+ bits |= FIELD_IS_NUMERIC_FLOAT; break;
+ case DOUBLE:
+ bits |= FIELD_IS_NUMERIC_DOUBLE; break;
+ default:
+ assert false : "Should never get here";
+ }
+ }
+ fieldsStream.writeByte((byte) bits);
- fieldsStream.writeByte(bits);
-
if (field.isBinary()) {
final byte[] data;
final int len;
@@ -152,8 +182,22 @@
fieldsStream.writeVInt(len);
fieldsStream.writeBytes(data, offset, len);
- }
- else {
+ } else if (field instanceof NumericField) {
+ final Number n = ((NumericField) field).getNumericValue();
+ final NumericField.DataType type = ((NumericField) field).getDataType();
+ switch (type) {
+ case INT:
+ fieldsStream.writeInt(n.intValue()); break;
+ case LONG:
+ fieldsStream.writeLong(n.longValue()); break;
+ case FLOAT:
+ fieldsStream.writeInt(Float.floatToIntBits(n.floatValue())); break;
+ case DOUBLE:
+ fieldsStream.writeLong(Double.doubleToLongBits(n.doubleValue())); break;
+ default:
+ assert false : "Should never get here";
+ }
+ } else {
fieldsStream.writeString(field.stringValue());
}
}
Index: lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestFieldsReader.java (revision 1099961)
+++ lucene/src/test/org/apache/lucene/index/TestFieldsReader.java (working copy)
@@ -27,12 +27,14 @@
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.LoadFirstFieldSelector;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
@@ -508,4 +510,69 @@
}
}
+
+ public void testNumericField() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random, dir);
+ final int numDocs = _TestUtil.nextInt(random, 500, 1000) * RANDOM_MULTIPLIER;
+ final Number[] answers = new Number[numDocs];
+ final NumericField.DataType[] typeAnswers = new NumericField.DataType[numDocs];
+ for(int id=0;id