Index: java/org/apache/lucene/document/AbstractField.java
===================================================================
--- java/org/apache/lucene/document/AbstractField.java (revision 683964)
+++ java/org/apache/lucene/document/AbstractField.java (working copy)
@@ -37,10 +37,12 @@
protected float boost = 1.0f;
// the one and only data object for all different kind of field values
protected Object fieldsData = null;
+ //length/offset for all primitive types
+ protected int binaryLength;
+ protected int binaryOffset;
protected AbstractField()
{
-
}
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
@@ -199,8 +201,44 @@
}
/** True iff the value of the filed is stored as binary */
- public final boolean isBinary() { return isBinary; }
+ public final boolean isBinary() {
+ return isBinary;
+ }
+
+ /**
+ * Return the raw byte[] for the binary field. Note that
+ * you must also call {@link #getBinaryLength} and {@link
+ * #getBinaryOffset} to know which range of bytes in this
+ * returned array belong to the field.
+ * @return reference to the Field value as byte[].
+ */
+ public byte[] getBinaryValue() {
+ return isBinary ? (byte[]) fieldsData : null;
+ }
+
+ public Fieldable getBinaryField(byte[] scratch){
+ return isBinary ? this : null;
+ }
+
+ /**
+ * Returns length of byte[] segment that is used as value, if Field is not binary
+ * returned value is undefined
+ * @return length of byte[] segment that represents this Field value
+ */
+ public int getBinaryLength() {
+ return binaryLength;
+ }
+
+ /**
+ * Returns offset into byte[] segment that is used as value, if Field is not binary
+ * returned value is undefined
+ * @return index of the first character in byte[] segment that represents this Field value
+ */
+ public int getBinaryOffset() {
+ return binaryOffset;
+ }
+
/** True if norms are omitted for this indexed field */
public boolean getOmitNorms() { return omitNorms; }
Index: java/org/apache/lucene/document/Document.java
===================================================================
--- java/org/apache/lucene/document/Document.java (revision 683964)
+++ java/org/apache/lucene/document/Document.java (working copy)
@@ -314,6 +314,28 @@
return null;
}
+ /**
+ * Returns Fieldable for the first (or only) field that has the name
+ * specified as the method parameter. This method will return null
+ * if no binary fields with the specified name are available.
+ * There may be non-binary fields with the same name.
+ * byte[] scratch may be null or shorter than needed, in that case new byte[] will be allocated.
+ * If this scratch buffer is big enough, you should find exactly this object in returned Fieldable.
+ *
+ * @param name the name of the field.
+ * @param byte[] buffer where the content will be stored, may be null
+ * @return a Fieldable containing the binary field value or null
+ */
+ public final Fieldable getStoredBinaryField(String name, byte[] scratch) {
+ for (int i=0; i < fields.size(); i++) {
+ Fieldable field = (Fieldable)fields.get(i);
+ if (field.name().equals(name) && (field.isBinary())){
+ return field.getBinaryField(scratch);
+ }
+ }
+ return null;
+ }
+
/** Prints the fields of a document for human consumption. */
public final String toString() {
StringBuffer buffer = new StringBuffer();
Index: java/org/apache/lucene/document/Field.java
===================================================================
--- java/org/apache/lucene/document/Field.java (revision 683964)
+++ java/org/apache/lucene/document/Field.java (working copy)
@@ -137,22 +137,39 @@
/** The value of the field as a String, or null. If null, the Reader value,
* binary value, or TokenStream value is used. Exactly one of stringValue(),
- * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
+ * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
/** The value of the field as a Reader, or null. If null, the String value,
* binary value, or TokenStream value is used. Exactly one of stringValue(),
- * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
+ * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
/** The value of the field in Binary, or null. If null, the Reader value,
* String value, or TokenStream value is used. Exactly one of stringValue(),
- * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
- public byte[] binaryValue() { return isBinary ? (byte[])fieldsData : null; }
+ * readerValue(), getBinaryValue(), and tokenStreamValue() must be set.
+ * @deprecated This method must allocate a new byte[] if
+ * the {@link AbstractField#getBinaryOffset()} is non-zero
+ * or {@link AbstractField#getBinaryLength()} is not the
+ * full length of the byte[]. Please use {@link
+ * AbstractField#getBinaryValue()} instead, which simply
+ * returns the byte[].
+ */
+ public byte[] binaryValue() {
+ if (!isBinary)
+ return null;
+ final byte[] data = (byte[]) fieldsData;
+ if (binaryOffset == 0 && data.length == binaryLength)
+ return data; //Optimization
+
+ final byte[] ret = new byte[binaryLength];
+ System.arraycopy(data, binaryOffset, ret, 0, binaryLength);
+ return ret;
+ }
/** The value of the field as a TokesStream, or null. If null, the Reader value,
* String value, or binary value is used. Exactly one of stringValue(),
- * readerValue(), binaryValue(), and tokenStreamValue() must be set. */
+ * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
public TokenStream tokenStreamValue() { return fieldsData instanceof TokenStream ? (TokenStream)fieldsData : null; }
@@ -182,9 +199,19 @@
/** Expert: change the value of this field. See setValue(String). */
public void setValue(byte[] value) {
fieldsData = value;
+ binaryLength = value.length;
+ binaryOffset = 0;
}
/** Expert: change the value of this field. See setValue(String). */
+ public void setValue(byte[] value, int offset, int length) {
+ fieldsData = value;
+ binaryLength = length;
+ binaryOffset = offset;
+ }
+
+
+ /** Expert: change the value of this field. See setValue(String). */
public void setValue(TokenStream value) {
fieldsData = value;
}
@@ -378,34 +405,49 @@
* @throws IllegalArgumentException if store is Store.NO
*/
public Field(String name, byte[] value, Store store) {
+ this(name, value, 0, value.length, store);
+ }
+
+ /**
+ * Create a stored field with binary value. Optionally the value may be compressed.
+ *
+ * @param name The name of the field
+ * @param value The binary value
+ * @param offset Starting offset in value where this Field's bytes are
+ * @param length Number of bytes to use for this Field, starting at offset
+ * @param store How value should be stored (compressed or not)
+ * @throws IllegalArgumentException if store is Store.NO
+ */
+ public Field(String name, byte[] value, int offset, int length, Store store) {
+
if (name == null)
throw new IllegalArgumentException("name cannot be null");
if (value == null)
throw new IllegalArgumentException("value cannot be null");
this.name = name.intern();
- this.fieldsData = value;
+ fieldsData = value;
- if (store == Store.YES){
- this.isStored = true;
- this.isCompressed = false;
+ if (store == Store.YES) {
+ isStored = true;
+ isCompressed = false;
}
else if (store == Store.COMPRESS) {
- this.isStored = true;
- this.isCompressed = true;
+ isStored = true;
+ isCompressed = true;
}
else if (store == Store.NO)
throw new IllegalArgumentException("binary values can't be unstored");
else
throw new IllegalArgumentException("unknown store parameter " + store);
- this.isIndexed = false;
- this.isTokenized = false;
+ isIndexed = false;
+ isTokenized = false;
- this.isBinary = true;
+ isBinary = true;
+ binaryLength = length;
+ binaryOffset = offset;
setStoreTermVector(TermVector.NO);
}
-
-
}
Index: java/org/apache/lucene/document/Fieldable.java
===================================================================
--- java/org/apache/lucene/document/Fieldable.java (revision 683964)
+++ java/org/apache/lucene/document/Fieldable.java (working copy)
@@ -156,4 +156,29 @@
* @return true if this field can be loaded lazily
*/
boolean isLazy();
+
+ /**
+ * Returns offset into byte[] segment that is used as value, if Field is not binary
+ * returned value is undefined
+ * @return index of the first character in byte[] segment that represents this Field value
+ */
+ abstract int getBinaryOffset();
+
+ /**
+ * Returns length of byte[] segment that is used as value, if Field is not binary
+ * returned value is undefined
+ * @return length of byte[] segment that represents this Field value
+ */
+ abstract int getBinaryLength();
+
+ /**
+ * Return the raw byte[] for the binary field. Note that
+ * you must also call {@link #getBinaryLength} and {@link
+ * #getBinaryOffset} to know which range of bytes in this
+ * returned array belong to the field.
+ * @return reference to the Field value as byte[].
+ */
+ abstract byte[] getBinaryValue();
+
+ abstract Fieldable getBinaryField(byte[] scratch);
}
Index: java/org/apache/lucene/index/FieldsReader.java
===================================================================
--- java/org/apache/lucene/index/FieldsReader.java (revision 683964)
+++ java/org/apache/lucene/index/FieldsReader.java (working copy)
@@ -451,27 +451,28 @@
* readerValue(), binaryValue(), and tokenStreamValue() must be set. */
public byte[] binaryValue() {
ensureOpen();
- if (isBinary) {
- if (fieldsData == null) {
- final byte[] b = new byte[toRead];
- IndexInput localFieldsStream = getFieldStream();
- //Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people
- //since they are already handling this exception when getting the document
- try {
- localFieldsStream.seek(pointer);
- localFieldsStream.readBytes(b, 0, b.length);
- if (isCompressed == true) {
- fieldsData = uncompress(b);
- } else {
- fieldsData = b;
- }
- } catch (IOException e) {
- throw new FieldReaderException(e);
+ if (isBinary && fieldsData == null) {
+ final byte[] b = new byte[toRead];
+ IndexInput localFieldsStream = getFieldStream();
+ //Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people
+ //since they are already handling this exception when getting the document
+ try {
+ localFieldsStream.seek(pointer);
+ localFieldsStream.readBytes(b, 0, b.length);
+ if (isCompressed == true) {
+ fieldsData = uncompress(b);
+ } else {
+ fieldsData = b;
}
+ } catch (IOException e) {
+ throw new FieldReaderException(e);
}
- return (byte[]) fieldsData;
- } else
- return null;
+ isBinary = true;
+ binaryOffset = 0;
+ binaryLength = toRead;
+ }
+
+ return isBinary ? (byte[]) fieldsData : null;
}
/** The value of the field as a Reader, or null. If null, the String value,
@@ -545,6 +546,39 @@
ensureOpen();
this.toRead = toRead;
}
+
+ public Fieldable getBinaryField(byte[] scratch) {
+ ensureOpen();
+
+ if (!isBinary) return null;
+ if (fieldsData != null) return this;
+
+ final byte[] b;
+
+ //reallocate scratch if null or too small
+ if(scratch == null || scratch.length < toRead){
+ b = new byte[toRead];
+ } else b = scratch;
+
+ IndexInput localFieldsStream = getFieldStream();
+ //Throw this IO Exception since IndexReader.document does so anyway, so probably not that big of a change for people
+ //since they are already handling this exception when getting the document
+ try {
+ localFieldsStream.seek(pointer);
+ localFieldsStream.readBytes(b, 0, toRead);
+ if (isCompressed == true) {
+ fieldsData = uncompress(b);//TODO: this could be optimized as well (reuse existing buffer)
+ } else {
+ fieldsData = b;
+ }
+ } catch (IOException e) {
+ throw new FieldReaderException(e);
+ }
+ isBinary = true;
+ binaryOffset = 0;
+ binaryLength = toRead;
+ return this;
+ }
}
private final byte[] uncompress(final byte[] input)
Index: java/org/apache/lucene/index/FieldsWriter.java
===================================================================
--- java/org/apache/lucene/index/FieldsWriter.java (revision 683964)
+++ java/org/apache/lucene/index/FieldsWriter.java (working copy)
@@ -21,6 +21,7 @@
import java.util.Iterator;
import java.util.zip.Deflater;
+import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory;
@@ -105,7 +106,7 @@
doClose = true;
}
- FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) throws IOException {
+ FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
fieldInfos = fn;
fieldsStream = fdt;
indexStream = fdx;
@@ -190,32 +191,43 @@
if (field.isCompressed()) {
// compression is enabled for the current field
- byte[] data = null;
-
+ final byte[] data;
+ final int len;
+ final int offset;
if (disableCompression) {
// optimized case for merging, the data
// is already compressed
- data = field.binaryValue();
+ final AbstractField f = (AbstractField) field;// FieldsReader.FieldForMerge is AbstracField
+ data = f.getBinaryValue();
+ len = f.getBinaryLength();
+ offset = f.getBinaryOffset();
} else {
// check if it is a binary field
if (field.isBinary()) {
- data = compress(field.binaryValue());
+ data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
+ } else {
+ byte x[] = field.stringValue().getBytes("UTF-8");
+ data = compress(x, 0, x.length);
}
- else {
- data = compress(field.stringValue().getBytes("UTF-8"));
- }
+ len = data.length;
+ offset = 0;
}
- final int len = data.length;
+
fieldsStream.writeVInt(len);
- fieldsStream.writeBytes(data, len);
+ fieldsStream.writeBytes(data, offset, len);
}
else {
// compression is disabled for the current field
if (field.isBinary()) {
- byte[] data = field.binaryValue();
- final int len = data.length;
+ final byte[] data;
+ final int len;
+ final int offset;
+ data = field.getBinaryValue();
+ len = field.getBinaryLength();
+ offset = field.getBinaryOffset();
+
fieldsStream.writeVInt(len);
- fieldsStream.writeBytes(data, len);
+ fieldsStream.writeBytes(data, offset, len);
}
else {
fieldsStream.writeString(field.stringValue());
@@ -259,19 +271,23 @@
}
}
- private final byte[] compress (byte[] input) {
+ private final byte[] compress (byte[] input, int offset, int length) {
+ // Create the compressor with highest level of compression
+ Deflater compressor = new Deflater();
+ compressor.setLevel(Deflater.BEST_COMPRESSION);
+ // Give the compressor the data to compress
+ compressor.setInput(input, offset, length);
+ compressor.finish();
+
/*
* Create an expandable byte array to hold the compressed data.
* You cannot use an array that's the same size as the orginal because
* there is no guarantee that the compressed data will be smaller than
* the uncompressed data.
*/
- ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
- // Create the compressor with highest level of compression
- Deflater compressor = new Deflater();
-
try {
compressor.setLevel(Deflater.BEST_COMPRESSION);
Index: test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- test/org/apache/lucene/index/TestIndexWriter.java (revision 683964)
+++ test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -3765,4 +3765,36 @@
w.doFail = false;
w.rollback();
}
+
+
+ // LUCENE-1219
+ public void testBinaryFieldOffsetLength() throws IOException {
+ MockRAMDirectory dir = new MockRAMDirectory();
+ IndexWriter w = new IndexWriter(dir, false, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+ byte[] b = new byte[50];
+ for(int i=0;i<50;i++)
+ b[i] = (byte) (i+77);
+
+ Document doc = new Document();
+ Field f = new Field("binary", b, 10, 17, Field.Store.YES);
+ byte[] bx = f.getBinaryValue();
+ assertTrue(bx != null);
+ assertEquals(50, bx.length);
+ assertEquals(10, f.getBinaryOffset());
+ assertEquals(17, f.getBinaryLength());
+ doc.add(f);
+ w.addDocument(doc);
+ w.close();
+
+ IndexReader ir = IndexReader.open(dir);
+ doc = ir.document(0);
+ f = doc.getField("binary");
+ b = f.getBinaryValue();
+ assertTrue(b != null);
+ assertEquals(17, b.length, 17);
+ assertEquals(87, b[0]);
+ ir.close();
+ dir.close();
+ }
+
}