Index: solr/src/java/org/apache/solr/schema/TrieDateField.java =================================================================== --- solr/src/java/org/apache/solr/schema/TrieDateField.java (revision 1099211) +++ solr/src/java/org/apache/solr/schema/TrieDateField.java (working copy) @@ -65,7 +65,7 @@ public Date toObject(Fieldable f) { byte[] arr = f.getBinaryValue(); if (arr==null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,TrieField.badFieldString(f)); - return new Date(TrieField.toLong(arr)); + return new Date(NumericUtils.bytesToLong(arr)); } @Override @@ -89,7 +89,7 @@ return; } - xmlWriter.writeDate(name,new Date(TrieField.toLong(arr))); + xmlWriter.writeDate(name,new Date(NumericUtils.bytesToLong(arr))); } @Override @@ -100,7 +100,7 @@ return; } - writer.writeDate(name,new Date(TrieField.toLong(arr))); + writer.writeDate(name,new Date(NumericUtils.bytesToLong(arr))); } @Override @@ -136,7 +136,7 @@ public String toExternal(Fieldable f) { byte[] arr = f.getBinaryValue(); if (arr==null) return TrieField.badFieldString(f); - return super.toExternal(new Date(TrieField.toLong(arr))); + return super.toExternal(new Date(NumericUtils.bytesToLong(arr))); } @Override @@ -167,7 +167,7 @@ TokenStream ts=null; long time = super.parseMath(null, externalVal).getTime(); - if (stored) arr = TrieField.toArr(time); + if (stored) arr = NumericUtils.longToBytes(time); if (indexed) ts = new NumericTokenStream(ps).setLongValue(time); Field f; Index: solr/src/java/org/apache/solr/schema/TrieField.java =================================================================== --- solr/src/java/org/apache/solr/schema/TrieField.java (revision 1099211) +++ solr/src/java/org/apache/solr/schema/TrieField.java (working copy) @@ -98,15 +98,15 @@ if (arr==null) return badFieldString(f); switch (type) { case INTEGER: - return toInt(arr); + return NumericUtils.bytesToInt(arr); case FLOAT: - return toFloat(arr); + return NumericUtils.bytesToFloat(arr); case LONG: - return toLong(arr); + return NumericUtils.bytesToLong(arr); case DOUBLE: - return toDouble(arr); + return NumericUtils.bytesToDouble(arr); case DATE: - return new Date(toLong(arr)); + return new Date(NumericUtils.bytesToLong(arr)); default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name()); } @@ -160,19 +160,19 @@ switch (type) { case INTEGER: - xmlWriter.writeInt(name,toInt(arr)); + xmlWriter.writeInt(name,NumericUtils.bytesToInt(arr)); break; case FLOAT: - xmlWriter.writeFloat(name,toFloat(arr)); + xmlWriter.writeFloat(name,NumericUtils.bytesToFloat(arr)); break; case LONG: - xmlWriter.writeLong(name,toLong(arr)); + xmlWriter.writeLong(name,NumericUtils.bytesToLong(arr)); break; case DOUBLE: - xmlWriter.writeDouble(name,toDouble(arr)); + xmlWriter.writeDouble(name,NumericUtils.bytesToDouble(arr)); break; case DATE: - xmlWriter.writeDate(name,new Date(toLong(arr))); + xmlWriter.writeDate(name,new Date(NumericUtils.bytesToLong(arr))); break; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name()); @@ -188,19 +188,19 @@ } switch (type) { case INTEGER: - writer.writeInt(name,toInt(arr)); + writer.writeInt(name,NumericUtils.bytesToInt(arr)); break; case FLOAT: - writer.writeFloat(name,toFloat(arr)); + writer.writeFloat(name,NumericUtils.bytesToFloat(arr)); break; case LONG: - writer.writeLong(name,toLong(arr)); + writer.writeLong(name,NumericUtils.bytesToLong(arr)); break; case DOUBLE: - writer.writeDouble(name,toDouble(arr)); + writer.writeDouble(name,NumericUtils.bytesToDouble(arr)); break; case DATE: - writer.writeDate(name,new Date(toLong(arr))); + writer.writeDate(name,new Date(NumericUtils.bytesToLong(arr))); break; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name()); @@ -273,56 +273,6 @@ return query; } - - static int toInt(byte[] arr) { - return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff); - } - - static long toLong(byte[] arr) { - int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff); - int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff); - return (((long)high)<<32) | (low&0x0ffffffffL); - } - - static float toFloat(byte[] arr) { - return Float.intBitsToFloat(toInt(arr)); - } - - static double toDouble(byte[] arr) { - return Double.longBitsToDouble(toLong(arr)); - } - - static byte[] toArr(int val) { - byte[] arr = new byte[4]; - arr[0] = (byte)(val>>>24); - arr[1] = (byte)(val>>>16); - arr[2] = (byte)(val>>>8); - arr[3] = (byte)(val); - return arr; - } - - static byte[] toArr(long val) { - byte[] arr = new byte[8]; - arr[0] = (byte)(val>>>56); - arr[1] = (byte)(val>>>48); - arr[2] = (byte)(val>>>40); - arr[3] = (byte)(val>>>32); - arr[4] = (byte)(val>>>24); - arr[5] = (byte)(val>>>16); - arr[6] = (byte)(val>>>8); - arr[7] = (byte)(val); - return arr; - } - - static byte[] toArr(float val) { - return toArr(Float.floatToRawIntBits(val)); - } - - static byte[] toArr(double val) { - return toArr(Double.doubleToRawLongBits(val)); - } - - @Override public String storedToReadable(Fieldable f) { return toExternal(f); @@ -364,15 +314,15 @@ if (arr==null) return badFieldString(f); switch (type) { case INTEGER: - return Integer.toString(toInt(arr)); + return Integer.toString(NumericUtils.bytesToInt(arr)); case FLOAT: - return Float.toString(toFloat(arr)); + return Float.toString(NumericUtils.bytesToFloat(arr)); case LONG: - return Long.toString(toLong(arr)); + return Long.toString(NumericUtils.bytesToLong(arr)); case DOUBLE: - return Double.toString(toDouble(arr)); + return Double.toString(NumericUtils.bytesToDouble(arr)); case DATE: - return dateField.formatDate(new Date(toLong(arr))); + return dateField.formatDate(new Date(NumericUtils.bytesToLong(arr))); default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name()); } @@ -422,27 +372,27 @@ switch (type) { case INTEGER: int i = Integer.parseInt(externalVal); - if (stored) arr = toArr(i); + if (stored) arr = NumericUtils.intToBytes(i); if (indexed) ts = new NumericTokenStream(ps).setIntValue(i); break; case FLOAT: float f = Float.parseFloat(externalVal); - if (stored) arr = toArr(f); + if (stored) arr = NumericUtils.floatToBytes(f); if (indexed) ts = new NumericTokenStream(ps).setFloatValue(f); break; case LONG: long l = Long.parseLong(externalVal); - if (stored) arr = toArr(l); + if (stored) arr = NumericUtils.longToBytes(l); if (indexed) ts = new NumericTokenStream(ps).setLongValue(l); break; case DOUBLE: double d = Double.parseDouble(externalVal); - if (stored) arr = toArr(d); + if (stored) arr = NumericUtils.doubleToBytes(d); if (indexed) ts = new NumericTokenStream(ps).setDoubleValue(d); break; case DATE: long time = dateField.parseMath(null, externalVal).getTime(); - if (stored) arr = toArr(time); + if (stored) arr = NumericUtils.longToBytes(time); if (indexed) ts = new NumericTokenStream(ps).setLongValue(time); break; default: Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1099211) +++ lucene/CHANGES.txt (working copy) @@ -43,6 +43,14 @@ PhraseQuery as term with lower doc freq will also have less positions. (Uwe Schindler, Robert Muir, Otis Gospodnetic) +* LUCENE-3065: When a NumericField is retrieved from a Document loaded + from IndexReader (or IndexSearcher), it will now come back as + NumericField not as a Field with a string-ified version of the + numeric value you had indexed. Note that this only applies for + newly-indexed Documents; older indices will still return Field + with the string-ified numeric value (Uwe Schindler, Ryan McKinley, + Mike McCandless) + Test Cases * LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to Index: lucene/src/test/org/apache/lucene/document/TestDocument.java =================================================================== --- lucene/src/test/org/apache/lucene/document/TestDocument.java (revision 1099211) +++ lucene/src/test/org/apache/lucene/document/TestDocument.java (working copy) @@ -3,13 +3,14 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -279,4 +280,60 @@ // expected } } + + public void testNumericField() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, dir); + final int numDocs = _TestUtil.nextInt(random, 500, 1000) * RANDOM_MULTIPLIER; + final Number[] answers = new Number[numDocs]; + for(int id=0;id= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) { - fieldsStream.seek(fieldsStream.getFilePointer() + toRead); - } else { - // We need to skip chars. This will slow us down, but still better - fieldsStream.skipChars(toRead); - } + if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) { + fieldsStream.seek(fieldsStream.getFilePointer() + toRead); + } else { + // We need to skip chars. This will slow us down, but still better + fieldsStream.skipChars(toRead); + } } - private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, boolean cacheResult) throws IOException { + private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, boolean cacheResult, byte numeric) throws IOException { if (binary) { int toRead = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); @@ -333,6 +357,8 @@ doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, compressed, cacheResult)); //Need to move the pointer ahead by toRead positions fieldsStream.seek(pointer + toRead); + } else if (numeric != -1) { + doc.add(loadNumericField(fi.name, numeric)); } else { Field.Store store = Field.Store.YES; Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize); @@ -366,8 +392,41 @@ } - private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException { + private NumericField loadNumericField(String fieldName, byte numeric) throws IOException { + switch(numeric) { + case FieldsWriter.FIELD_IS_NUMERIC_INT: + { + final byte[] b = new byte[4]; + fieldsStream.readBytes(b, 0, b.length); + return new NumericField(fieldName).setIntValue(NumericUtils.bytesToInt(b)); + } + case FieldsWriter.FIELD_IS_NUMERIC_LONG: + { + final byte[] b = new byte[8]; + fieldsStream.readBytes(b, 0, b.length); + return new NumericField(fieldName).setLongValue(NumericUtils.bytesToLong(b)); + } + case FieldsWriter.FIELD_IS_NUMERIC_FLOAT: + { + final byte[] b = new byte[4]; + fieldsStream.readBytes(b, 0, b.length); + return new NumericField(fieldName).setFloatValue(NumericUtils.bytesToFloat(b)); + } + case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE: + { + final byte[] b = new byte[8]; + fieldsStream.readBytes(b, 0, b.length); + return new NumericField(fieldName).setDoubleValue(NumericUtils.bytesToDouble(b)); + } + default: + // BUG + assert false; + return null; + } + } + private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, byte numeric) throws CorruptIndexException, IOException { + //we have a binary stored field, and it may be compressed if (binary) { int toRead = fieldsStream.readVInt(); @@ -378,6 +437,8 @@ } else { doc.add(new Field(fi.name, b)); } + } else if (numeric != -1) { + doc.add(loadNumericField(fi.name, numeric)); } else { Field.Store store = Field.Store.YES; Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize); @@ -415,8 +476,19 @@ // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes) // Read just the size -- caller must skip the field content to continue reading fields // Return the size in bytes or chars, depending on field type - private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException { - int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size; + private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed, byte numeric) throws IOException { + final int bytesize, size; + if (numeric != -1) { + if (numeric == FieldsWriter.FIELD_IS_NUMERIC_INT || numeric == FieldsWriter.FIELD_IS_NUMERIC_FLOAT) { + bytesize = 4; + } else { + bytesize = 8; + } + size = bytesize; + } else { + size = fieldsStream.readVInt(); + bytesize = (binary || compressed) ? size : 2*size; + } byte[] sizebytes = new byte[4]; sizebytes[0] = (byte) (bytesize>>>24); sizebytes[1] = (byte) (bytesize>>>16); @@ -427,7 +499,7 @@ } /** - * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is + * A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is * loaded. */ private class LazyField extends AbstractField implements Fieldable { @@ -519,13 +591,13 @@ } catch (IOException e) { throw new FieldReaderException(e); } - if (cacheResult == true){ - fieldsData = value; - } - return value; + if (cacheResult == true){ + fieldsData = value; + } + return value; } else{ - return (String) fieldsData; - } + return (String) fieldsData; + } } } @@ -574,25 +646,24 @@ if (isCompressed == true) { value = uncompress(b); } else { - value = b; - } + value = b; + } } catch (IOException e) { throw new FieldReaderException(e); } binaryOffset = 0; binaryLength = toRead; - if (cacheResult == true){ - fieldsData = value; - } - return value; + if (cacheResult == true){ + fieldsData = value; + } + return value; } else{ - return (byte[]) fieldsData; - } - - - } else - return null; + return (byte[]) fieldsData; + } + } else { + return null; + } } } Index: lucene/src/java/org/apache/lucene/index/FieldsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldsWriter.java (revision 1099211) +++ lucene/src/java/org/apache/lucene/index/FieldsWriter.java (working copy) @@ -21,15 +21,25 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.NumericField; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.RAMOutputStream; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.NumericUtils; final class FieldsWriter { static final byte FIELD_IS_TOKENIZED = 0x1; static final byte FIELD_IS_BINARY = 0x2; + static final byte FIELD_IS_NUMERIC = 0x8; + + // 2 bits w/ mask 0x12: + static final byte FIELD_IS_NUMERIC_INT = 0x10; + static final byte FIELD_IS_NUMERIC_LONG = 0x20; + static final byte FIELD_IS_NUMERIC_FLOAT = 0x0; + static final byte FIELD_IS_NUMERIC_DOUBLE = 0x30; + static final byte FIELD_NUMERIC_MASK = 0x30; /** @deprecated Kept for backwards-compatibility with <3.0 indexes; will be removed in 4.0 */ @Deprecated @@ -44,10 +54,13 @@ // Lucene 3.0: Removal of compressed fields static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; + // Lucene 3.2: NumericFields are stored in binary format + static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3; + // NOTE: if you introduce a new format, make it 1 higher // than the current one, and always change this if you // switch to a new format! - static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; + static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS; private FieldInfos fieldInfos; @@ -136,10 +149,34 @@ fieldsStream.writeVInt(fi.number); byte bits = 0; if (field.isTokenized()) - bits |= FieldsWriter.FIELD_IS_TOKENIZED; + bits |= FIELD_IS_TOKENIZED; if (field.isBinary()) - bits |= FieldsWriter.FIELD_IS_BINARY; + bits |= FIELD_IS_BINARY; + final byte[] numBytes; + + if (field instanceof NumericField) { + final Number n = ((NumericField) field).getNumericValue(); + bits |= FIELD_IS_NUMERIC; + if (n instanceof Integer) { + bits |= FIELD_IS_NUMERIC_INT; + numBytes = NumericUtils.intToBytes(((Integer) n).intValue()); + } else if (n instanceof Long) { + bits |= FIELD_IS_NUMERIC_LONG; + numBytes = NumericUtils.longToBytes(((Long) n).longValue()); + } else if (n instanceof Float) { + bits |= FIELD_IS_NUMERIC_FLOAT; + numBytes = NumericUtils.floatToBytes(((Float) n).floatValue()); + } else { + assert n instanceof Double; + bits |= FIELD_IS_NUMERIC_DOUBLE; + numBytes = NumericUtils.doubleToBytes(((Double) n).doubleValue()); + } + assert bits != 31: "n=" + n; + } else { + numBytes = null; + } + fieldsStream.writeByte(bits); if (field.isBinary()) { @@ -152,8 +189,9 @@ fieldsStream.writeVInt(len); fieldsStream.writeBytes(data, offset, len); - } - else { + } else if (numBytes != null) { + fieldsStream.writeBytes(numBytes, 0, numBytes.length); + } else { fieldsStream.writeString(field.stringValue()); } } Index: lucene/src/java/org/apache/lucene/util/NumericUtils.java =================================================================== --- lucene/src/java/org/apache/lucene/util/NumericUtils.java (revision 1099211) +++ lucene/src/java/org/apache/lucene/util/NumericUtils.java (working copy) @@ -465,5 +465,55 @@ } } - + + /** Below methods used for encoding NumericField into + * index's stored fields: */ + + public static byte[] intToBytes(int val) { + byte[] arr = new byte[4]; + arr[0] = (byte)(val>>>24); + arr[1] = (byte)(val>>>16); + arr[2] = (byte)(val>>>8); + arr[3] = (byte)(val); + return arr; + } + + public static int bytesToInt(byte[] arr) { + return (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff); + } + + public static byte[] longToBytes(long val) { + byte[] arr = new byte[8]; + arr[0] = (byte)(val>>>56); + arr[1] = (byte)(val>>>48); + arr[2] = (byte)(val>>>40); + arr[3] = (byte)(val>>>32); + arr[4] = (byte)(val>>>24); + arr[5] = (byte)(val>>>16); + arr[6] = (byte)(val>>>8); + arr[7] = (byte)(val); + return arr; + } + + public static long bytesToLong(byte[] arr) { + int high = (arr[0]<<24) | ((arr[1]&0xff)<<16) | ((arr[2]&0xff)<<8) | (arr[3]&0xff); + int low = (arr[4]<<24) | ((arr[5]&0xff)<<16) | ((arr[6]&0xff)<<8) | (arr[7]&0xff); + return (((long)high)<<32) | (low&0x0ffffffffL); + } + + public static byte[] floatToBytes(float val) { + return intToBytes(Float.floatToRawIntBits(val)); + } + + public static float bytesToFloat(byte[] arr) { + return Float.intBitsToFloat(bytesToInt(arr)); + } + + public static byte[] doubleToBytes(double val) { + return longToBytes(Double.doubleToRawLongBits(val)); + } + + public static double bytesToDouble(byte[] arr) { + return Double.longBitsToDouble(bytesToLong(arr)); + } }