Index: src/test/org/apache/lucene/index/TestFieldsReader.java =================================================================== --- src/test/org/apache/lucene/index/TestFieldsReader.java (revision 805058) +++ src/test/org/apache/lucene/index/TestFieldsReader.java (working copy) @@ -112,12 +112,15 @@ SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); Document doc = reader.doc(0, fieldSelector); assertTrue("doc is null and it shouldn't be", doc != null); + Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY); assertTrue("field is null and it shouldn't be", field != null); assertTrue("field is not lazy and it should be", field.isLazy()); String value = field.stringValue(); assertTrue("value is null and it shouldn't be", value != null); assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); + field = doc.getFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY); assertTrue("field is null and it shouldn't be", field != null); assertTrue("field is not lazy and it should be", field.isLazy()); @@ -125,22 +128,29 @@ value = field.stringValue(); assertTrue("value is null and it shouldn't be", value != null); assertTrue(value + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT, value.equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); + field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY); assertTrue("field is null and it shouldn't be", field != null); assertTrue("Field is lazy and it should not be", field.isLazy() == false); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); + field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF1_KEY); assertTrue("field is null and it shouldn't be", field != null); assertTrue("Field is lazy and it should not be", field.isLazy() == false); assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF2_KEY); assertTrue("field is null and it shouldn't be", field != null); assertTrue("Field is lazy and it should not be", field.isLazy() == true); assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); assertTrue("field is null and it shouldn't be", field != null); assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null); + assertTrue("calling stringValue() twice should give same reference", field.binaryValue() == field.binaryValue()); byte [] bytes = field.binaryValue(); assertTrue("bytes is null and it shouldn't be", bytes != null); @@ -151,6 +161,87 @@ } } + public void testLatentFields() throws Exception { + assertTrue(dir != null); + assertTrue(fieldInfos != null); + FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); + assertTrue(reader != null); + assertTrue(reader.size() == 1); + Set loadFieldNames = new HashSet(); + loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY); + loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY); + Set lazyFieldNames = new HashSet(); + //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; + lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); + lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY); + lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY); + lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY); + lazyFieldNames.add(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY); + + // Use LATENT instead of LAZY + SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames) { + public FieldSelectorResult accept(String fieldName) { + final FieldSelectorResult result = super.accept(fieldName); + if (result.equals(FieldSelectorResult.LAZY_LOAD)) { + return FieldSelectorResult.LATENT; + } else { + return result; + } + } + }; + + Document doc = reader.doc(0, fieldSelector); + assertTrue("doc is null and it shouldn't be", doc != null); + Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("field is not lazy and it should be", field.isLazy()); + String value = field.stringValue(); + assertTrue("value is null and it shouldn't be", value != null); + assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true); + assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue()); + + field = doc.getFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("field is not lazy and it should be", field.isLazy()); + assertTrue("binaryValue isn't null for lazy string field", field.binaryValue() == null); + value = field.stringValue(); + assertTrue("value is null and it shouldn't be", value != null); + assertTrue(value + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT, value.equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true); + + field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("Field is lazy and it should not be", field.isLazy() == false); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); + + field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF1_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("Field is lazy and it should not be", field.isLazy() == false); + assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); + + field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF2_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("Field is lazy and it should not be", field.isLazy() == true); + assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true); + assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue()); + + field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null); + assertTrue("calling binaryValue() twice should give different references", field.binaryValue() != field.binaryValue()); + + byte [] bytes = field.binaryValue(); + assertTrue("bytes is null and it shouldn't be", bytes != null); + assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length); + for (int i = 0; i < bytes.length; i++) { + assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]); + + } + } + + + + public void testLazyFieldsAfterClose() throws Exception { assertTrue(dir != null); assertTrue(fieldInfos != null); Index: src/java/org/apache/lucene/index/FieldsReader.java =================================================================== --- src/java/org/apache/lucene/index/FieldsReader.java (revision 805058) +++ src/java/org/apache/lucene/index/FieldsReader.java (working copy) @@ -236,8 +236,11 @@ break;//Get out of this loop } else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) { - addFieldLazy(doc, fi, binary, compressed, tokenize); + addFieldLazy(doc, fi, binary, compressed, tokenize, true); } + else if (acceptField.equals(FieldSelectorResult.LATENT)) { + addFieldLazy(doc, fi, binary, compressed, tokenize, false); + } else if (acceptField.equals(FieldSelectorResult.SIZE)){ skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); } @@ -296,16 +299,16 @@ } } - private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { + private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, boolean cacheResult) throws IOException { if (binary) { int toRead = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); if (compressed) { //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS)); - doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer, binary)); + doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer, binary, cacheResult)); } else { //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); - doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary)); + doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult)); } //Need to move the pointer ahead by toRead positions fieldsStream.seek(pointer + toRead); @@ -319,7 +322,7 @@ store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); - f = new LazyField(fi.name, store, toRead, pointer, binary); + f = new LazyField(fi.name, store, toRead, pointer, binary, cacheResult); //skip over the part that we aren't loading fieldsStream.seek(pointer + toRead); f.setOmitNorms(fi.omitNorms); @@ -332,7 +335,7 @@ fieldsStream.seek(pointer+length); else fieldsStream.skipChars(length); - f = new LazyField(fi.name, store, index, termVector, length, pointer, binary); + f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult); f.setOmitNorms(fi.omitNorms); f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions); } @@ -454,22 +457,25 @@ private class LazyField extends AbstractField implements Fieldable { private int toRead; private long pointer; + private final boolean cacheResult; - public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) { + public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean cacheResult) { super(name, store, Field.Index.NO, Field.TermVector.NO); this.toRead = toRead; this.pointer = pointer; this.isBinary = isBinary; + this.cacheResult = cacheResult; if (isBinary) binaryLength = toRead; lazy = true; } - public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) { + public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean cacheResult) { super(name, store, index, termVector); this.toRead = toRead; this.pointer = pointer; this.isBinary = isBinary; + this.cacheResult = cacheResult; if (isBinary) binaryLength = toRead; lazy = true; @@ -518,28 +524,38 @@ if (fieldsData == null) { IndexInput localFieldsStream = getFieldStream(); try { + String value; localFieldsStream.seek(pointer); if (isCompressed) { final byte[] b = new byte[toRead]; localFieldsStream.readBytes(b, 0, b.length); - fieldsData = new String(uncompress(b), "UTF-8"); + value = new String(uncompress(b), "UTF-8"); } else { if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { byte[] bytes = new byte[toRead]; localFieldsStream.readBytes(bytes, 0, toRead); - fieldsData = new String(bytes, "UTF-8"); + value = new String(bytes, "UTF-8"); } else { //read in chars b/c we already know the length we need to read char[] chars = new char[toRead]; localFieldsStream.readChars(chars, 0, toRead); - fieldsData = new String(chars); + value = new String(chars); } } + + // Cache the value (LAZY_LOAD) + if (cacheResult) { + fieldsData = value; + } + + // Return the loaded value + return value; } catch (IOException e) { throw new FieldReaderException(e); } + } else { + return (String) fieldsData; } - return (String) fieldsData; } } @@ -580,22 +596,30 @@ // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people // since they are already handling this exception when getting the document try { + byte[] value; localFieldsStream.seek(pointer); localFieldsStream.readBytes(b, 0, toRead); if (isCompressed == true) { - fieldsData = uncompress(b); + value = uncompress(b); } else { - fieldsData = b; + value = b; } + + // Cache the value (LAZY_LOAD) + if (cacheResult) { + fieldsData = value; + } + + // Return the loaded value + binaryOffset = 0; + binaryLength = toRead; + return value; } catch (IOException e) { throw new FieldReaderException(e); } - - binaryOffset = 0; - binaryLength = toRead; + } else { + return (byte[]) fieldsData; } - - return (byte[]) fieldsData; } else return null; } Index: src/java/org/apache/lucene/document/FieldSelectorResult.java =================================================================== --- src/java/org/apache/lucene/document/FieldSelectorResult.java (revision 805058) +++ src/java/org/apache/lucene/document/FieldSelectorResult.java (working copy) @@ -75,8 +75,13 @@ /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded */ public transient static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6); + /** + * Similar to {@link #LAZY_LOAD} except that fields with this selector will never be cached in a {@link Document} instance. + * Each time {@link Fieldable#stringValue()} is called, the string value will be be pulled from disk. This is useful when + * requesting very large fields that should not be cached in memory. + */ + public transient static final FieldSelectorResult LATENT = new FieldSelectorResult(7); - private int id; private FieldSelectorResult(int id) {