diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
index 37ccb27..defbcf1 100644
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
@@ -276,6 +276,42 @@ public class InstantiatedIndexReader extends IndexReader {
* @see org.apache.lucene.document.LoadFirstFieldSelector
*/
@Override
+ public Document document(int n, Document doc, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ return document(n);
+ }
+
+ /**
+ * Return the {@link org.apache.lucene.document.Document} at the nth
+ * position.
+
+ * Warning!
+ * The resulting document is the actual stored document instance
+ * and not a deserialized clone as retuned by an IndexReader
+ * over a {@link org.apache.lucene.store.Directory}.
+ * I.e., if you need to touch the document, clone it first!
+ *
+ * This can also be seen as a feature for live changes of stored values,
+ * but be careful! Adding a field with an name unknown to the index
+ * or to a field with previously no stored values will make
+ * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
+ * out of sync, causing problems for instance when merging the
+ * instantiated index to another index.
+
+ * This implementation ignores the field selector! All stored fields are always returned!
+ *
+ *
+ * @param n document number
+ * @param fieldSelector ignored
+ * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ *
+ * @see org.apache.lucene.document.Fieldable
+ * @see org.apache.lucene.document.FieldSelector
+ * @see org.apache.lucene.document.SetBasedFieldSelector
+ * @see org.apache.lucene.document.LoadFirstFieldSelector
+ */
+ @Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
return document(n);
}
@@ -300,7 +336,6 @@ public class InstantiatedIndexReader extends IndexReader {
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
-
@Override
public Document document(int n) throws IOException {
return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument();
diff --git a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 88ac477..40cd4e9 100644
--- a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -1140,16 +1140,16 @@ public class MemoryIndex implements Serializable {
}
@Override
- public Document document(int n) {
+ public Document document(int n, FieldSelector fieldSelector) {
if (DEBUG) System.err.println("MemoryIndexReader.document");
return new Document(); // there are no stored fields
}
//When we convert to JDK 1.5 make this Set
@Override
- public Document document(int n, FieldSelector fieldSelector) throws IOException {
+ public Document document(int n, Document doc, FieldSelector fieldSelector) throws IOException {
if (DEBUG) System.err.println("MemoryIndexReader.document");
- return new Document(); // there are no stored fields
+ return (doc != null) ? doc : new Document(); // there are no stored fields
}
@Override
diff --git a/src/java/org/apache/lucene/document/Document.java b/src/java/org/apache/lucene/document/Document.java
index 680bfd9..beb7e98 100644
--- a/src/java/org/apache/lucene/document/Document.java
+++ b/src/java/org/apache/lucene/document/Document.java
@@ -37,12 +37,28 @@ import org.apache.lucene.index.IndexReader; // for javadoc
*/
public final class Document implements java.io.Serializable {
- List fields = new ArrayList();
+ final List fields;
private float boost = 1.0f;
/** Constructs a new document with no fields. */
- public Document() {}
+ public Document() {
+ this(new ArrayList());
+ }
+
+ /** Construct a Document with an initial capacity for fields
+ * @param initialCapacity initial number of fields in the internal List */
+ public Document(final int initialCapacity) {
+ this(new ArrayList(initialCapacity));
+ }
+ /** Construct a Document with a List of Fieldables
+ * @param fields a List of Fielables to use. NOT COPIED, so be careful */
+ public Document(final List fields) {
+ this.fields = fields;
+ if (fields == null) {
+ throw new IllegalArgumentException("fields must be non-null");
+ }
+ }
/** Sets a boost factor for hits on any field of this document. This value
* will be multiplied into the score of all hits on this document.
diff --git a/src/java/org/apache/lucene/index/DirectoryReader.java b/src/java/org/apache/lucene/index/DirectoryReader.java
index 46877c7..c94a23a 100644
--- a/src/java/org/apache/lucene/index/DirectoryReader.java
+++ b/src/java/org/apache/lucene/index/DirectoryReader.java
@@ -520,9 +520,15 @@ class DirectoryReader extends IndexReader implements Cloneable {
// inherit javadoc
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ return document(n, null, fieldSelector);
+ }
+
+ // inherit javadoc
+ @Override
+ public Document document(int n, Document doc, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
ensureOpen();
int i = readerIndex(n); // find segment num
- return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
+ return subReaders[i].document(n - starts[i], doc, fieldSelector); // dispatch to segment reader
}
@Override
diff --git a/src/java/org/apache/lucene/index/FieldsReader.java b/src/java/org/apache/lucene/index/FieldsReader.java
index 6eed299..6de9993 100644
--- a/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/src/java/org/apache/lucene/index/FieldsReader.java
@@ -213,13 +213,34 @@ final class FieldsReader implements Cloneable {
return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
}
+ /**
+ * legacy signature, will create Document as needed
+ * @param n
+ * @param fieldSelector
+ * @return a new Document filled with fields as per fieldSelector
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ return doc(n, null, fieldSelector);
+ }
+
+ /**
+ * method signature to allow re-use or aggregation of fields into an existing Document.
+ * @param n
+ * @param d
+ * @param fieldSelector
+ * @return a Document with fields added as per fieldSelector
+ * @throws CorruptIndexException
+ * @throws IOException
+ */
+ final Document doc(int n, Document d, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
seekIndex(n);
long position = indexStream.readLong();
fieldsStream.seek(position);
- Document doc = new Document();
- int numFields = fieldsStream.readVInt();
+ final int numFields = fieldsStream.readVInt();
+ final Document doc = new Document((fieldSelector == null && numFields > 10) ? numFields : 10);
for (int i = 0; i < numFields; i++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
diff --git a/src/java/org/apache/lucene/index/FilterIndexReader.java b/src/java/org/apache/lucene/index/FilterIndexReader.java
index ded965f..da26574 100644
--- a/src/java/org/apache/lucene/index/FilterIndexReader.java
+++ b/src/java/org/apache/lucene/index/FilterIndexReader.java
@@ -157,8 +157,13 @@ public class FilterIndexReader extends IndexReader {
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ return document(n, null, fieldSelector);
+ }
+
+ @Override
+ public Document document(int n, Document doc, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
ensureOpen();
- return in.document(n, fieldSelector);
+ return in.document(n, doc, fieldSelector);
}
@Override
diff --git a/src/java/org/apache/lucene/index/IndexReader.java b/src/java/org/apache/lucene/index/IndexReader.java
index e1d35ec..e108c0f 100644
--- a/src/java/org/apache/lucene/index/IndexReader.java
+++ b/src/java/org/apache/lucene/index/IndexReader.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.*;
@@ -655,7 +656,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
*/
public Document document(int n) throws CorruptIndexException, IOException {
ensureOpen();
- return document(n, null);
+ return document(n, null, null);
}
/**
@@ -688,8 +689,36 @@ public abstract class IndexReader implements Cloneable,Closeable {
* @see org.apache.lucene.document.SetBasedFieldSelector
* @see org.apache.lucene.document.LoadFirstFieldSelector
*/
- // TODO (1.5): When we convert to JDK 1.5 make this Set
public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
+
+ /**
+ * Override this to provide a more efficient implementation.
+ * @param n Get the document at the nth position
+ * @param doc a {@link Document}, must be in what caller wishes to constitute
+ * initialized and empty state. If null a new document is created
+ * @param fieldSelector The {@link FieldSelector} to use to determine what
+ * Fields should be loaded on the Document. May be null, in which case
+ * all Fields will be loaded.
+ * @return The stored fields of the
+ * {@link org.apache.lucene.document.Document} at the nth position
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ * @see IndexReader#document(int, FieldSelector)
+ * @see org.apache.lucene.document.Fieldable
+ * @see org.apache.lucene.document.FieldSelector
+ * @see org.apache.lucene.document.SetBasedFieldSelector
+ * @see org.apache.lucene.document.LoadFirstFieldSelector
+ */
+ public Document document(int n, Document doc, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ Document d = document(n, fieldSelector);
+ if (doc != null) {
+ for (Fieldable f : d.getFields()) {
+ doc.add(f);
+ }
+ return doc;
+ }
+ return d;
+ }
/** Returns true if document n has been deleted */
public abstract boolean isDeleted(int n);
diff --git a/src/java/org/apache/lucene/index/MultiReader.java b/src/java/org/apache/lucene/index/MultiReader.java
index 7c20c4a..ff43425 100644
--- a/src/java/org/apache/lucene/index/MultiReader.java
+++ b/src/java/org/apache/lucene/index/MultiReader.java
@@ -246,9 +246,15 @@ public class MultiReader extends IndexReader implements Cloneable {
// inherit javadoc
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ return document(n, null, fieldSelector);
+ }
+
+ // inherit javadoc
+ @Override
+ public Document document(int n, Document doc, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
ensureOpen();
int i = readerIndex(n); // find segment num
- return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
+ return subReaders[i].document(n - starts[i], doc, fieldSelector); // dispatch to segment reader
}
@Override
diff --git a/src/java/org/apache/lucene/index/ParallelReader.java b/src/java/org/apache/lucene/index/ParallelReader.java
index 66aee15..1719eb6 100644
--- a/src/java/org/apache/lucene/index/ParallelReader.java
+++ b/src/java/org/apache/lucene/index/ParallelReader.java
@@ -269,11 +269,16 @@ public class ParallelReader extends IndexReader {
hasDeletions = false;
}
- // append fields from storedFieldReaders
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ return document(n, null, fieldSelector);
+ }
+
+ // append fields from storedFieldReaders
+ @Override
+ public Document document(int n, Document doc, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
ensureOpen();
- Document result = new Document();
+ Document result = (doc != null) ? doc : new Document();
for (final IndexReader reader: storedFieldReaders) {
boolean include = (fieldSelector==null);
@@ -290,9 +295,12 @@ public class ParallelReader extends IndexReader {
}
}
if (include) {
- List fields = reader.document(n, fieldSelector).getFields();
- for (Fieldable field : fields) {
- result.add(field);
+ // pass document, if yields a different doc, manually copy the fields
+ Document pdoc = reader.document(n, result, fieldSelector);
+ if (pdoc != result) {
+ for (Fieldable field : pdoc.getFields()) {
+ result.add(field);
+ }
}
}
}
diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java
index d9761a9..8a83e70 100644
--- a/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@@ -897,8 +897,13 @@ public class SegmentReader extends IndexReader implements Cloneable {
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ return document(n, null, fieldSelector);
+ }
+
+ @Override
+ public Document document(int n, Document doc, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
ensureOpen();
- return getFieldsReader().doc(n, fieldSelector);
+ return getFieldsReader().doc(n, doc, fieldSelector);
}
@Override
diff --git a/src/test/org/apache/lucene/document/TestDocument.java b/src/test/org/apache/lucene/document/TestDocument.java
index 079ab6e..7a5894e 100644
--- a/src/test/org/apache/lucene/document/TestDocument.java
+++ b/src/test/org/apache/lucene/document/TestDocument.java
@@ -1,5 +1,8 @@
package org.apache.lucene.document;
+import java.util.ArrayList;
+import java.util.LinkedList;
+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
@@ -36,7 +39,43 @@ public class TestDocument extends LuceneTestCase
String binaryVal = "this text will be stored as a byte array in the index";
String binaryVal2 = "this text will be also stored as a byte array in the index";
-
+
+ /** test the 3 constructor variants */
+ public void testDocumentConstructors() {
+ Document d;
+ d = new Document();
+ assertNotNull(d.fields);
+
+ // unfortunately ArrayList doesn't allow us to validate initialCapacity
+ d = new Document(128);
+ assertNotNull(d.fields);
+
+ ArrayList fields = new ArrayList(512);
+ d = new Document(fields);
+ assertSame(fields, d.fields);
+
+ // any kind of list is ok, though perhaps rarely wise
+ LinkedList linked = new LinkedList();
+ d = new Document(linked);
+ assertSame(linked, d.fields);
+ }
+
+ /** test the constructor variants exceptions */
+ public void testDocumentConstructorExceptions() {
+ try {
+ new Document(-1);
+ fail("negative capcity should throw an exception");
+ } catch (IllegalArgumentException e) {
+ // expected exception
+ }
+ try {
+ new Document(null);
+ fail("null list should throw an exception");
+ } catch (IllegalArgumentException e) {
+ // expected exception
+ }
+ }
+
public void testBinaryField()
throws Exception
{
diff --git a/src/test/org/apache/lucene/index/TestDirectoryReader.java b/src/test/org/apache/lucene/index/TestDirectoryReader.java
index aed8ff1..1e0e797 100644
--- a/src/test/org/apache/lucene/index/TestDirectoryReader.java
+++ b/src/test/org/apache/lucene/index/TestDirectoryReader.java
@@ -22,10 +22,12 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
+import java.util.List;
public class TestDirectoryReader extends LuceneTestCase {
protected Directory dir;
@@ -85,6 +87,32 @@ public class TestDirectoryReader extends LuceneTestCase {
assertTrue(vector != null);
TestSegmentReader.checkNorms(reader);
}
+
+ /**
+ * tests reader.document(int, Document, FieldSelector)
+ * test that provided Document is reused if non-null, else created
+ * @throws IOException
+ */
+ public void doTestDocumentWithDoc() throws IOException {
+ sis.read(dir);
+ IndexReader reader = openReader();
+ assertTrue(reader != null);
+
+ Document newDoc1 = reader.document(0, null, null);
+ assertTrue(newDoc1 != null);
+ assertTrue(DocHelper.numFields(newDoc1) == DocHelper.numFields(doc1) - DocHelper.unstored.size());
+
+ Document newDoc1a = new Document();
+ Document newDoc1b = reader.document(0, newDoc1a, null);
+ assertSame(newDoc1a, newDoc1b);
+ assertTrue(DocHelper.numFields(newDoc1a) == DocHelper.numFields(doc1) - DocHelper.unstored.size());
+
+ Document newDoc1c = reader.document(0, newDoc1a, null);
+ assertSame(newDoc1a, newDoc1c);
+ assertTrue(DocHelper.numFields(newDoc1a) == 2 * (DocHelper.numFields(doc1) - DocHelper.unstored.size()));
+ List f = newDoc1a.getFields();
+ assertEquals(f.get(0).stringValue(), f.get(f.size()/2).stringValue());
+ }
public void doTestUndeleteAll() throws IOException {
sis.read(dir);
diff --git a/src/test/org/apache/lucene/index/TestParallelReader.java b/src/test/org/apache/lucene/index/TestParallelReader.java
index 67e3ff4..7714a4d 100644
--- a/src/test/org/apache/lucene/index/TestParallelReader.java
+++ b/src/test/org/apache/lucene/index/TestParallelReader.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
@@ -29,6 +30,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
+import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.LoadFirstFieldSelector;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.search.BooleanQuery;
@@ -128,7 +130,35 @@ public class TestParallelReader extends LuceneTestCase {
assertEquals("v2", doc2.get("f3"));
assertEquals("v2", doc2.get("f3"));
}
-
+
+ /**
+ * tests reader.document(int, Document, FieldSelector)
+ * test that provided Document is reused if non-null, else created
+ * @throws IOException
+ */
+ public void doTestDocumentWithDoc() throws IOException {
+ Directory dir1 = getDir1();
+ Directory dir2 = getDir2();
+ ParallelReader pr = new ParallelReader();
+ pr.add(IndexReader.open(dir1, false));
+ pr.add(IndexReader.open(dir2, false));
+
+ Document newDoc1 = pr.document(0, null, null);
+ assertTrue(newDoc1 != null);
+ assertEquals(4, DocHelper.numFields(newDoc1));
+
+ Document newDoc1a = new Document();
+ Document newDoc1b = pr.document(0, newDoc1a, null);
+ assertSame(newDoc1a, newDoc1b);
+ assertEquals(4, DocHelper.numFields(newDoc1a));
+
+ Document newDoc1c = pr.document(0, newDoc1a, null);
+ assertSame(newDoc1a, newDoc1c);
+ assertEquals(8, DocHelper.numFields(newDoc1a));
+ List f = newDoc1a.getFields();
+ assertEquals(f.get(0).stringValue(), f.get(f.size()/2).stringValue());
+ }
+
public void testDocumentAcceptF2BlockedByBreak() throws IOException {
Directory dir1 = getDir1();
Directory dir2 = getDir2();
diff --git a/src/test/org/apache/lucene/index/TestSegmentReader.java b/src/test/org/apache/lucene/index/TestSegmentReader.java
index 58e219d..aa2318e 100644
--- a/src/test/org/apache/lucene/index/TestSegmentReader.java
+++ b/src/test/org/apache/lucene/index/TestSegmentReader.java
@@ -69,7 +69,29 @@ public class TestSegmentReader extends LuceneTestCase {
assertTrue(DocHelper.nameValues.containsKey(field.name()));
}
}
-
+
+ /**
+ * tests reader.document(int, Document, FieldSelector)
+ * test that provided Document is reused if non-null, else created
+ * @throws IOException
+ */
+ public void doTestDocumentWithDoc() throws IOException {
+ Document newDoc1 = reader.document(0, null, null);
+ assertTrue(newDoc1 != null);
+ assertTrue(DocHelper.numFields(newDoc1) == DocHelper.numFields(testDoc) - DocHelper.unstored.size());
+
+ Document newDoc1a = new Document();
+ Document newDoc1b = reader.document(0, newDoc1a, null);
+ assertSame(newDoc1a, newDoc1b);
+ assertTrue(DocHelper.numFields(newDoc1a) == DocHelper.numFields(testDoc) - DocHelper.unstored.size());
+
+ Document newDoc1c = reader.document(0, newDoc1a, null);
+ assertSame(newDoc1a, newDoc1c);
+ assertTrue(DocHelper.numFields(newDoc1a) == 2 * (DocHelper.numFields(testDoc) - DocHelper.unstored.size()));
+ List f = newDoc1a.getFields();
+ assertEquals(f.get(0).stringValue(), f.get(f.size()/2).stringValue());
+ }
+
public void testDelete() throws IOException {
Document docToDelete = new Document();
DocHelper.setupDoc(docToDelete);