Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 791601) +++ CHANGES.txt (working copy) @@ -127,6 +127,11 @@ is failing to close reader/writers. (Brian Groose via Mike McCandless) + 9. LUCENE-1727: Ensure that fields are stored & retrieved in the + exact order in which they were added to the document. This was + true in all Lucene releases before 2.3, but was broken in 2.3 and + 2.4, and is now fixed in 2.9. (Mike McCandless) + API Changes 1. LUCENE-1419: Add expert API to set custom indexing chain. This API is Index: src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 791601) +++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -24,6 +24,7 @@ import java.util.Random; import java.util.Map; import java.util.HashMap; +import java.util.Iterator; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.SinkTokenizer; @@ -4408,4 +4409,36 @@ dir.close(); } + + // LUCENE-1727: make sure doc fields are stored in order + public void testStoredFieldsOrder() throws Throwable { + Directory d = new MockRAMDirectory(); + IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); + Document doc = new Document(); + doc.add(new Field("zzz", "a b c", Field.Store.YES, Field.Index.NO)); + doc.add(new Field("aaa", "a b c", Field.Store.YES, Field.Index.NO)); + doc.add(new Field("zzz", "1 2 3", Field.Store.YES, Field.Index.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + doc = r.document(0); + Iterator it = doc.getFields().iterator(); + assertTrue(it.hasNext()); + Field f = (Field) it.next(); + assertEquals(f.name(), "zzz"); + assertEquals(f.stringValue(), "a b c"); + + assertTrue(it.hasNext()); + f = (Field) it.next(); + assertEquals(f.name(), "aaa"); + assertEquals(f.stringValue(), "a b c"); + + assertTrue(it.hasNext()); + f = (Field) it.next(); + assertEquals(f.name(), "zzz"); + assertEquals(f.stringValue(), "1 2 3"); + assertFalse(it.hasNext()); + r.close(); + w.close(); + d.close(); + } } Index: src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java =================================================================== --- src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (revision 791601) +++ src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (working copy) @@ -23,6 +23,7 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.util.ArrayUtil; /** * Gathers all Fieldables for a document under the same @@ -50,13 +51,16 @@ int hashMask = 1; int totalFieldCount; + final StoredFieldsWriterPerThread fieldsWriter; + final DocumentsWriter.DocState docState; - + public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException { this.docState = threadState.docState; this.docFieldProcessor = docFieldProcessor; this.fieldInfos = docFieldProcessor.fieldInfos; this.consumer = docFieldProcessor.consumer.addThread(this); + fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState); } public void abort() { @@ -68,6 +72,7 @@ field = next; } } + fieldsWriter.abort(); consumer.abort(); } @@ -148,6 +153,8 @@ public DocumentsWriter.DocWriter processDocument() throws IOException { consumer.startDocument(); + fieldsWriter.startDocument(); + final Document doc = docState.doc; assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start"); @@ -220,6 +227,9 @@ } fp.fields[fp.fieldCount++] = field; + if (field.isStored()) { + fieldsWriter.addField(field, fp.fieldInfo); + } } // If we are writing vectors then we must visit @@ -236,7 +246,21 @@ if (docState.maxTermPrefix != null && docState.infoStream != null) docState.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); - return consumer.finishDocument(); + final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument(); + final DocumentsWriter.DocWriter two = consumer.finishDocument(); + if (one == null) { + return two; + } else if (two == null) { + return one; + } else { + PerDoc both = getPerDoc(); + both.docID = docState.docID; + assert one.docID == docState.docID; + assert two.docID == docState.docID; + both.one = one; + both.two = two; + return both; + } } void quickSort(DocFieldProcessorPerField[] array, int lo, int hi) { @@ -299,4 +323,62 @@ quickSort(array, lo, left); quickSort(array, left + 1, hi); } + + PerDoc[] docFreeList = new PerDoc[1]; + int freeCount; + int allocCount; + + synchronized PerDoc getPerDoc() { + if (freeCount == 0) { + allocCount++; + if (allocCount > docFreeList.length) { + // Grow our free list up front to make sure we have + // enough space to recycle all outstanding PerDoc + // instances + assert allocCount == 1+docFreeList.length; + docFreeList = new PerDoc[ArrayUtil.getNextSize(allocCount)]; + } + return new PerDoc(); + } else + return docFreeList[--freeCount]; + } + + synchronized void freePerDoc(PerDoc perDoc) { + assert freeCount < docFreeList.length; + docFreeList[freeCount++] = perDoc; + } + + class PerDoc extends DocumentsWriter.DocWriter { + + DocumentsWriter.DocWriter one; + DocumentsWriter.DocWriter two; + + public long sizeInBytes() { + return one.sizeInBytes() + two.sizeInBytes(); + } + + public void finish() throws IOException { + try { + try { + one.finish(); + } finally { + two.finish(); + } + } finally { + freePerDoc(this); + } + } + + public void abort() { + try { + try { + one.abort(); + } finally { + two.abort(); + } + } finally { + freePerDoc(this); + } + } + } } Index: src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java =================================================================== --- src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (revision 791601) +++ src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (working copy) @@ -19,8 +19,9 @@ import java.io.IOException; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.document.Fieldable; -final class StoredFieldsWriterPerThread extends DocFieldConsumerPerThread { +final class StoredFieldsWriterPerThread { final FieldsWriter localFieldsWriter; final StoredFieldsWriter storedFieldsWriter; @@ -28,9 +29,9 @@ StoredFieldsWriter.PerDoc doc; - public StoredFieldsWriterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, StoredFieldsWriter storedFieldsWriter) throws IOException { + public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) throws IOException { this.storedFieldsWriter = storedFieldsWriter; - this.docState = docFieldProcessorPerThread.docState; + this.docState = docState; localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos); } @@ -44,6 +45,21 @@ } } + public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { + if (doc == null) { + doc = storedFieldsWriter.getPerDoc(); + doc.docID = docState.docID; + localFieldsWriter.setFieldsStream(doc.fdt); + assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields; + assert 0 == doc.fdt.length(); + assert 0 == doc.fdt.getFilePointer(); + } + + localFieldsWriter.writeField(fieldInfo, field); + assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField"); + doc.numStoredFields++; + } + public DocumentsWriter.DocWriter finishDocument() { // If there were any stored fields in this doc, doc will // be non-null; else it's null. @@ -60,8 +76,4 @@ doc = null; } } - - public DocFieldConsumerPerField addField(FieldInfo fieldInfo) { - return new StoredFieldsWriterPerField(this, fieldInfo); - } } Index: src/java/org/apache/lucene/index/StoredFieldsWriter.java =================================================================== --- src/java/org/apache/lucene/index/StoredFieldsWriter.java (revision 791601) +++ src/java/org/apache/lucene/index/StoredFieldsWriter.java (working copy) @@ -17,30 +17,31 @@ * limitations under the License. */ -import java.util.Map; import java.io.IOException; import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.ArrayUtil; /** This is a DocFieldConsumer that writes stored fields. */ -final class StoredFieldsWriter extends DocFieldConsumer { +final class StoredFieldsWriter { FieldsWriter fieldsWriter; final DocumentsWriter docWriter; + final FieldInfos fieldInfos; int lastDocID; PerDoc[] docFreeList = new PerDoc[1]; int freeCount; - public StoredFieldsWriter(DocumentsWriter docWriter) { + public StoredFieldsWriter(DocumentsWriter docWriter, FieldInfos fieldInfos) { this.docWriter = docWriter; + this.fieldInfos = fieldInfos; } - public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException { - return new StoredFieldsWriterPerThread(docFieldProcessorPerThread, this); + public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException { + return new StoredFieldsWriterPerThread(docState, this); } - synchronized public void flush(Map threadsAndFields, SegmentWriteState state) throws IOException { + synchronized public void flush(SegmentWriteState state) throws IOException { if (state.numDocsInStore > 0) { // It's possible that all documents seen in this segment Index: src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java =================================================================== --- src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java (revision 791601) +++ src/java/org/apache/lucene/index/StoredFieldsWriterPerField.java (working copy) @@ -1,66 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import org.apache.lucene.document.Fieldable; - -final class StoredFieldsWriterPerField extends DocFieldConsumerPerField { - - final StoredFieldsWriterPerThread perThread; - final FieldInfo fieldInfo; - final DocumentsWriter.DocState docState; - - public StoredFieldsWriterPerField(StoredFieldsWriterPerThread perThread, FieldInfo fieldInfo) { - this.perThread = perThread; - this.fieldInfo = fieldInfo; - docState = perThread.docState; - } - - // Process all occurrences of a single field in one doc; - // count is 1 if a given field occurs only once in the - // Document, which is the "typical" case - public void processFields(Fieldable[] fields, int count) throws IOException { - - final StoredFieldsWriter.PerDoc doc; - if (perThread.doc == null) { - doc = perThread.doc = perThread.storedFieldsWriter.getPerDoc(); - doc.docID = docState.docID; - perThread.localFieldsWriter.setFieldsStream(doc.fdt); - assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields; - assert 0 == doc.fdt.length(); - assert 0 == doc.fdt.getFilePointer(); - } else { - doc = perThread.doc; - assert doc.docID == docState.docID: "doc.docID=" + doc.docID + " docState.docID=" + docState.docID; - } - - for(int i=0;i