Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (revision 1425451) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (working copy) @@ -102,17 +102,8 @@ isCompoundFile, null, diagnostics, Collections.unmodifiableMap(attributes)); info.setFiles(files); - int updatesIndex = 1; - while (updatesIndex > 0) { - files = readFilesList(directory, segmentName, updatesIndex, context); - if (files == null) { - updatesIndex = -1; - } else { - info.addFiles(files); - updatesIndex++; - } - } - + addUpdateSegmentsFiles(info, directory, segmentName, context); + success = true; return info; } finally { @@ -124,8 +115,10 @@ } } - private Set readFilesList(Directory dir, String segment, long generation, IOContext context) throws IOException { - final String segFileName = IndexFileNames.fileNameFromGeneration(segment, Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + protected Set readFilesList(Directory dir, String segment, + long generation, IOContext context) throws IOException { + final String segFileName = IndexFileNames.fileNameFromGeneration(segment, + Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); if (!dir.fileExists(segFileName)) { return null; } Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (revision 1425451) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (working copy) @@ -125,9 +125,10 @@ } @Override - public void writeFilesList(Directory dir, SegmentInfo si, long generation, IOContext ioContext) throws IOException { - final String segFileName = IndexFileNames.fileNameFromGeneration(si.name, - Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + public void writeUpdatedSegmentFiles(Directory dir, SegmentInfo si, + long generation, IOContext ioContext) throws IOException { + final String segFileName = IndexFileNames.segmentFileName(si.name, "", + Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION); si.addFile(segFileName); boolean success = false; Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java (working copy) @@ -24,12 +24,11 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StorableField; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; @@ -37,12 +36,12 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; -import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; -import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.Counter; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.PagedBytes; @@ -108,19 +107,22 @@ * @param fixedSize * true if all bytes subsequently passed to the * {@link Writer} will have the same length - * @param sortComparator {@link BytesRef} comparator used by sorted variants. - * If null {@link BytesRef#getUTF8SortedAsUnicodeComparator()} - * is used instead + * @param sortComparator + * {@link BytesRef} comparator used by sorted variants. If + * null + * {@link BytesRef#getUTF8SortedAsUnicodeComparator()} is used + * instead * @param bytesUsed * an {@link AtomicLong} instance to track the used bytes within the - * {@link Writer}. A call to {@link Writer#finish(int)} will release - * all internally used resources and frees the memory tracking - * reference. + * {@link Writer}. A call to {@link Writer#finish(int)} + * will release all internally used resources and frees the memory + * tracking reference. * @param acceptableOverheadRatio * how to trade space for speed. This option is only applicable for * docvalues of type {@link Type#BYTES_FIXED_SORTED} and * {@link Type#BYTES_VAR_SORTED}. - * @param context I/O Context + * @param context + * I/O Context * @return a new {@link Writer} instance * @see PackedInts#getReader(org.apache.lucene.store.DataInput) */ Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java (working copy) @@ -21,11 +21,10 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesArraySource; import org.apache.lucene.codecs.lucene40.values.FixedStraightBytesImpl.FixedBytesWriterBase; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StorableField; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (working copy) @@ -71,16 +71,7 @@ null, diagnostics, Collections.unmodifiableMap(attributes)); si.setFiles(files); - int updatesIndex = 1; - while (updatesIndex > 0) { - files = readFilesList(dir, segment, updatesIndex, context); - if (files == null) { - updatesIndex = -1; - } else { - si.addFiles(files); - updatesIndex++; - } - } + addUpdateSegmentsFiles(si, dir, segment, context); success = true; @@ -95,7 +86,7 @@ } } - private Set readFilesList(Directory dir, String segment, + protected Set readFilesList(Directory dir, String segment, long generation, IOContext context) throws IOException { final String fileName = IndexFileNames.fileNameFromGeneration(segment, Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java (working copy) @@ -65,7 +65,7 @@ } finally { if (!success) { IOUtils.closeWhileHandlingException(output); - si.dir.deleteFile(fileName); + dir.deleteFile(fileName); } else { output.close(); } @@ -73,10 +73,10 @@ } @Override - public void writeFilesList(Directory dir, SegmentInfo si, + public void writeUpdatedSegmentFiles(Directory dir, SegmentInfo si, long generation, IOContext ioContext) throws IOException { - final String fileName = IndexFileNames.fileNameFromGeneration(si.name, - Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + final String fileName = IndexFileNames.segmentFileName(si.name, "", + Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION); si.addFile(fileName); final IndexOutput output = dir.createOutput(fileName, ioContext); @@ -89,7 +89,7 @@ } finally { if (!success) { IOUtils.closeWhileHandlingException(output); - si.dir.deleteFile(fileName); + dir.deleteFile(fileName); } else { output.close(); } Index: lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (working copy) @@ -22,9 +22,8 @@ import java.util.Comparator; import java.util.List; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; Index: lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java (working copy) @@ -21,7 +21,6 @@ import org.apache.lucene.document.ByteDocValuesField; import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.document.DoubleDocValuesField; -import org.apache.lucene.document.Field; import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.IntDocValuesField; import org.apache.lucene.document.LongDocValuesField; @@ -31,12 +30,12 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.MergeState; import org.apache.lucene.index.StorableField; -import org.apache.lucene.index.MergeState; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -84,7 +83,8 @@ * the total number of documents in this {@link DocValuesConsumer}. * Must be greater than or equal the last given docID to * {@link #add(int, StorableField)}. - * @throws IOException If an I/O error occurs + * @throws IOException + * If an I/O error occurs */ public abstract void finish(int docCount) throws IOException; Index: lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java (working copy) @@ -52,12 +52,25 @@ */ public FieldGenerationReplacements readGenerationReplacements(String field, SegmentInfoPerCommit info, IOContext context) throws IOException { - String fileName = getLastGenerationFileName(field, info.info.dir, info); - if (fileName == null) { - return null; + + FieldGenerationReplacements reps = null; + + for (long gen = 1; gen <= info.getUpdateGen(); gen++) { + final String fileName = IndexFileNames.segmentFileName( + IndexFileNames.fileNameFromGeneration(info.info.name, "", gen, true), + field, FIELD_GENERATION_REPLACEMENT_EXTENSION); + if (info.info.dir.fileExists(fileName)) { + final FieldGenerationReplacements + newGeneration = internalReadGeneration(info.info.dir, fileName, context); + if (reps == null) { + reps = newGeneration; + } else { + reps.merge(newGeneration); + } + } } - - return internalReadGeneration(info.info.dir, fileName, context); + + return reps; } private FieldGenerationReplacements internalReadGeneration(Directory dir, @@ -78,19 +91,6 @@ } } - private String getLastGenerationFileName(String field, Directory dir, - SegmentInfoPerCommit info) throws IOException { - for (long i = info.getUpdateGen(); i > 0; i--) { - final String fileName = IndexFileNames.segmentFileName( - IndexFileNames.fileNameFromGeneration(info.info.name, "", i, false), - field, FIELD_GENERATION_REPLACEMENT_EXTENSION); - if (dir.fileExists(fileName)) { - return fileName; - } - } - return null; - } - /** * Read persisted field generation replacements from a given input. */ @@ -110,18 +110,8 @@ return; } - // load replacements from previous file - String prevFileName = getLastGenerationFileName(field, dir, info); - final FieldGenerationReplacements existing; - if (prevFileName != null) { - existing = internalReadGeneration(dir, prevFileName, context); - existing.merge(reps); - } else { - existing = reps; - } - final String nameWithGeneration = IndexFileNames.fileNameFromGeneration( - info.info.name, "", info.getNextUpdateGen(), false); + info.info.name, "", info.getNextUpdateGen(), true); final String fileName = IndexFileNames.segmentFileName(nameWithGeneration, field, FIELD_GENERATION_REPLACEMENT_EXTENSION); @@ -133,13 +123,9 @@ } finally { if (!success) { IOUtils.closeWhileHandlingException(output); - info.info.dir.deleteFile(fileName); + dir.deleteFile(fileName); } else { output.close(); - if (prevFileName != null) { - // remove previous file - info.info.dir.deleteFile(prevFileName); - } } } } Index: lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoReader.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoReader.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.Set; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; @@ -44,4 +45,47 @@ * @throws IOException If an I/O error occurs */ public abstract SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException; + + /** + * Add files of update segments to the segment info. + * @param info The segment info to update + * @param dir The containing directory + * @param segmentName The name of the handled segment + * @param context The IOContext + * @throws IOException If error occurred when reading files lists + */ + protected void addUpdateSegmentsFiles(final SegmentInfo info, Directory dir, + String segmentName, IOContext context) + throws IOException { + int generation = 1; + while (generation > 0) { + Set files = readFilesList(dir, segmentName, generation, context); + if (files == null) { + generation = -1; + } else { + info.addFiles(files); + generation++; + } + } + } + + /** + * Read list of files related to a certain generation in an updated segment + * + * @param dir + * The containing directory + * @param segmentName + * The name of the handled segment + * @param generation + * The update generation + * @param context + * The IOContext + * @return A list of the files corresponding to the update generation. + * @throws IOException + * If error occurred when reading files list + */ + protected abstract Set readFilesList(Directory dir, + String segmentName, long generation, IOContext context) + throws IOException; + } Index: lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoWriter.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoWriter.java (working copy) @@ -44,7 +44,8 @@ * @param ioContext IO context to use * @throws IOException If an I/O error occurs */ - public abstract void write(Directory dir, SegmentInfo info, FieldInfos fis, IOContext ioContext) throws IOException; + public abstract void write(Directory dir, SegmentInfo info, FieldInfos fis, + IOContext ioContext) throws IOException; /** * Write the list of files belonging to an updates segment of the segment with @@ -57,5 +58,7 @@ * @throws IOException * If an I/O error occurs */ - public abstract void writeFilesList(Directory dir, SegmentInfo info, long generation, IOContext ioContext) throws IOException; + public abstract void writeUpdatedSegmentFiles(Directory dir, + SegmentInfo info, long generation, IOContext ioContext) + throws IOException; } Index: lucene/core/src/java/org/apache/lucene/index/DocConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocConsumer.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/DocConsumer.java (working copy) @@ -19,9 +19,11 @@ import java.io.IOException; +import org.apache.lucene.store.Directory; + abstract class DocConsumer { - abstract void processDocument(FieldInfos.Builder fieldInfos) throws IOException; - abstract void finishDocument() throws IOException; + abstract void processDocument(FieldInfos.Builder fieldInfos, SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException; + abstract void finishDocument(Directory directory, SegmentInfo segmentInfo) throws IOException; abstract void flush(final SegmentWriteState state) throws IOException; abstract void abort(); abstract boolean freeRAM(); Index: lucene/core/src/java/org/apache/lucene/index/DocFieldConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocFieldConsumer.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/DocFieldConsumer.java (working copy) @@ -20,9 +20,12 @@ import java.io.IOException; import java.util.Map; +import org.apache.lucene.store.Directory; + abstract class DocFieldConsumer { /** Called when DocumentsWriterPerThread decides to create a new - * segment */ + * segment + */ abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; /** Called when an aborting exception is hit */ @@ -37,6 +40,6 @@ abstract DocFieldConsumerPerField addField(FieldInfo fi); - abstract void finishDocument() throws IOException; + abstract void finishDocument(Directory directory, SegmentInfo info) throws IOException; } Index: lucene/core/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (working copy) @@ -19,9 +19,11 @@ import java.io.IOException; +import org.apache.lucene.store.Directory; + abstract class DocFieldConsumerPerField { /** Processes all occurrences of a single field */ - abstract void processFields(IndexableField[] fields, int count) throws IOException; + abstract void processFields(IndexableField[] fields, int count, SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException; abstract void abort(); abstract FieldInfo getFieldInfo(); } Index: lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (working copy) @@ -30,6 +30,7 @@ import org.apache.lucene.codecs.PerDocConsumer; import org.apache.lucene.index.DocumentsWriterPerThread.DocState; import org.apache.lucene.index.TypePromoter.TypeCompatibility; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.IOUtils; @@ -203,7 +204,7 @@ } @Override - public void processDocument(FieldInfos.Builder fieldInfos) throws IOException { + public void processDocument(FieldInfos.Builder fieldInfos, SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException { consumer.startDocument(); fieldsWriter.startDocument(); @@ -237,7 +238,7 @@ final DocValues.Type dvType = ft.docValueType(); if (dvType != null) { DocValuesConsumerHolder docValuesConsumer = docValuesConsumer(dvType, - docState, fp.fieldInfo); + docState, fp.fieldInfo, segmentInfo, trackingDirectory); DocValuesConsumer consumer = docValuesConsumer.docValuesConsumer; if (docValuesConsumer.compatibility == null) { consumer.add(docState.docID, field); @@ -268,7 +269,7 @@ ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp); for(int i=0;i docValues = new HashMap(); private PerDocConsumer perDocConsumer; - DocValuesConsumerHolder docValuesConsumer(DocValues.Type valueType, DocState docState, FieldInfo fieldInfo) + DocValuesConsumerHolder docValuesConsumer(DocValues.Type valueType, DocState docState, FieldInfo fieldInfo, SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException { DocValuesConsumerHolder docValuesConsumerAndDocID = docValues.get(fieldInfo.name); if (docValuesConsumerAndDocID != null) { @@ -369,7 +370,7 @@ } if (perDocConsumer == null) { - PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(); + PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(segmentInfo, trackingDirectory); perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState); if (perDocConsumer == null) { throw new IllegalStateException("codec=" + docState.docWriter.codec + " does not support docValues: from docValuesFormat().docsConsumer(...) returned null; field=" + fieldInfo.name); Index: lucene/core/src/java/org/apache/lucene/index/DocInverter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocInverter.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/DocInverter.java (working copy) @@ -21,6 +21,8 @@ import java.util.HashMap; import java.util.Map; +import org.apache.lucene.store.Directory; + /** This is a DocFieldConsumer that inverts each field, * separately, from a Document, and accepts a * InvertedTermsConsumer to process those terms. */ @@ -61,11 +63,11 @@ } @Override - public void finishDocument() throws IOException { + public void finishDocument(Directory directory, SegmentInfo info) throws IOException { // TODO: allow endConsumer.finishDocument to also return // a DocWriter endConsumer.finishDocument(); - consumer.finishDocument(); + consumer.finishDocument(directory, info); } @Override Index: lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; /** @@ -60,8 +61,8 @@ } @Override - public void processFields(final IndexableField[] fields, - final int count) throws IOException { + public void processFields(final IndexableField[] fields, final int count, + SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException { fieldState.reset(); @@ -199,7 +200,7 @@ } consumer.finish(); - endConsumer.finish(); + endConsumer.finish(segmentInfo, trackingDirectory); } @Override Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -26,17 +27,19 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.GenerationReplacementsFormat; import org.apache.lucene.index.DocumentsWriterFlushQueue.SegmentFlushTicket; import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; import org.apache.lucene.index.FieldInfos.FieldNumbers; +import org.apache.lucene.index.FieldsUpdate.Operation; import org.apache.lucene.search.Query; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.MergeInfo; +import org.apache.lucene.store.TrackingDirectoryWrapper; import org.apache.lucene.util.InfoStream; /** @@ -395,7 +398,8 @@ return postUpdate(flushingDWPT, maybeMerge); } - boolean updateFields(Term term, FieldsUpdate fieldsUpdate) throws IOException { + boolean updateFields(Term term, Operation operation, IndexDocument fields, + Analyzer analyzer, FieldNumbers globalFieldNumberMap) throws IOException { boolean maybeMerge = preUpdate(); final ThreadState perThread = flushControl.obtainAndLock(); @@ -412,13 +416,17 @@ final DocumentsWriterPerThread dwpt = perThread.dwpt; try { + FieldsUpdate fieldsUpdate = new FieldsUpdate(operation, fields, + analyzer); + // invert the given fields and store in RAMDirectory + dwpt.invertFieldsUpdate(fieldsUpdate, globalFieldNumberMap); dwpt.updateFields(term, fieldsUpdate); } finally { if (dwpt.checkAndResetHasAborted()) { flushControl.doOnAbort(perThread); } } - final boolean isUpdate = term != null; + final boolean isUpdate = term != null; flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); } finally { perThread.unlock(); @@ -426,8 +434,27 @@ return postUpdate(flushingDWPT, maybeMerge); } - void writeUpdatedSegment(UpdatedSegmentData liveUpdates, - SegmentInfoPerCommit info, IndexFileDeleter deleter) throws IOException { + /** + * Write updates to a segment. + * + * @param infoPerCommit + * The segment to which updates are being written + * @param updates + * The updates to write + * @param interval + * Term index interval to be used when merging + * @param globalFieldNumberMap + * Map of field numbers for merging + * @param deleter + * Used to delete files if merging fails. + * @return If all updates were added return {@code null}, otherwise return a + * new {@link UpdatedSegmentData} with the remaining updates. + * @throws IOException + * If writing failed. + */ + UpdatedSegmentData writeUpdatedSegment(SegmentInfoPerCommit infoPerCommit, + UpdatedSegmentData updates, int interval, + FieldNumbers globalFieldNumberMap, IndexFileDeleter deleter) throws IOException { final ThreadState perThread = flushControl.obtainAndLock(); try { @@ -437,35 +464,64 @@ "perThread is not active but we are still open"); } + SegmentInfo info = new SegmentInfo(infoPerCommit.info, + infoPerCommit.getNextUpdateGen()); + final DocumentsWriterPerThread dwpt = perThread.dwpt; try { - // start new segment, with update generation in name - dwpt.initSegmentInfo(info.info, info.getNextUpdateGen()); + final IOContext context = new IOContext(new MergeInfo( + -1, -1, true, -1)); - // push documents, including empty ones where needed - liveUpdates.startWriting(info.getNextUpdateGen(), - info.info.getDocCount()); - IndexDocument doc; - while ((doc = liveUpdates.nextDocument()) != null) { - dwpt.updateDocument(doc, liveUpdates.getAnalyzer(), null); + // TODO: somehow we should fix this merge so it's + // abortable so that IW.close(false) is able to stop it + TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper( + directory); + + SegmentMerger merger = new SegmentMerger(info, infoStream, trackingDir, + interval, MergeState.CheckAbort.NONE, globalFieldNumberMap, context); + + updates.startWriting(infoPerCommit.getNextUpdateGen(), + infoPerCommit.info.getDocCount(), indexWriter.getConfig() + .getReaderTermsIndexDivisor()); + + AtomicReader reader; + while ((reader = updates.nextReader()) != null) { // add new indexes + merger.add(reader); } + boolean success = false; + try { + merger.merge(); // merge 'em + success = true; + } finally { + if (!success) { + synchronized (this) { + deleter.refresh(info.name); + } + } + } + // add field generation replacements - final Map generationReplacments = liveUpdates + final Map generationReplacments = updates .getFieldGenerationReplacments(); if (generationReplacments != null) { for (Entry field : generationReplacments .entrySet()) { - final GenerationReplacementsFormat repsFormat = codec - .generationReplacementsFormat(); - repsFormat.writeGenerationReplacement(field.getKey(), - field.getValue(), directory, info, IOContext.DEFAULT); + codec.generationReplacementsFormat().writeGenerationReplacement( + field.getKey(), field.getValue(), trackingDir, infoPerCommit, + IOContext.DEFAULT); } } // flush directly dwpt.clearDeleteSlice(); - dwpt.flush(info.getNextUpdateGen()); + info.setFiles(new HashSet(trackingDir.getCreatedFiles())); + dwpt.sealUpdatedSegment(info, trackingDir, infoPerCommit.getNextUpdateGen()); + + // add the final list of new files to infoPerCommit, must perform here + // since the list could change in sealUpdatedSegment + infoPerCommit.info.addFiles(info.files()); + } finally { if (dwpt.checkAndResetHasAborted()) { flushControl.doOnAbort(perThread); @@ -474,6 +530,8 @@ } finally { perThread.unlock(); } + + return null; } private boolean doFlush(DocumentsWriterPerThread flushingDWPT) @@ -510,7 +568,7 @@ ticket = ticketQueue.addFlushTicket(flushingDWPT); // flush concurrently without locking - final FlushedSegment newSegment = flushingDWPT.flush(-1); + final FlushedSegment newSegment = flushingDWPT.flush(); if (newSegment == null) { actualFlushes--; } else { Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (working copy) @@ -26,10 +26,13 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; +import org.apache.lucene.index.FieldInfos.Builder; +import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.TrackingDirectoryWrapper; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; @@ -184,7 +187,6 @@ BufferedDeletes pendingDeletes; BufferedUpdates pendingUpdates; SegmentInfo segmentInfo; // Current segment we are working on - SegmentInfo baseSegmentInfo; // name of the base segment for segmentInfo boolean aborting = false; // True if an abort is pending boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting @@ -217,6 +219,9 @@ intBlockAllocator = new IntBlockAllocator(bytesUsed); initialize(); this.indexingChain = indexingChain; + // this should be the last call in the ctor + // it really sucks that we need to pull this within the ctor and pass this ref to the chain! + consumer = indexingChain.getChain(this); } public DocumentsWriterPerThread(DocumentsWriterPerThread other, FieldInfos.Builder fieldInfos) { @@ -247,7 +252,7 @@ docState.analyzer = analyzer; docState.docID = numDocsInRAM; if (segmentInfo == null) { - initSegmentInfo(null, -1); + initSegmentInfo(); } if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name); @@ -255,7 +260,7 @@ boolean success = false; try { try { - consumer.processDocument(fieldInfos); + consumer.processDocument(fieldInfos, segmentInfo, directory); } finally { docState.clear(); } @@ -273,7 +278,7 @@ } success = false; try { - consumer.finishDocument(); + consumer.finishDocument(directory, segmentInfo); success = true; } finally { if (!success) { @@ -283,24 +288,14 @@ finishDocument(delTerm, null); } - void initSegmentInfo(SegmentInfo info, long updateGen) { - if (info == null) { - String segment = writer.newSegmentName(); - segmentInfo = new SegmentInfo(directoryOrig, - Constants.LUCENE_MAIN_VERSION, segment, -1, false, codec, null, null); - baseSegmentInfo = null; - } else { - baseSegmentInfo = info; - segmentInfo = new SegmentInfo(directoryOrig, - Constants.LUCENE_MAIN_VERSION, IndexFileNames.fileNameFromGeneration( - info.name, "", updateGen, true), -1, false, codec, null, null); - } + private void initSegmentInfo() { + String segment = writer.newSegmentName(); + segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segment, -1, + false, codec, null, null); assert numDocsInRAM == 0; if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { - infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentInfo.name + " delQueue=" + deleteQueue); + infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segment + " delQueue=" + deleteQueue); } - // reset consumer, may have previous segment name as inner state - consumer = indexingChain.getChain(this); } public int updateDocuments(Iterable docs, Analyzer analyzer, Term delTerm) throws IOException { @@ -308,7 +303,7 @@ assert deleteQueue != null; docState.analyzer = analyzer; if (segmentInfo == null) { - initSegmentInfo(null, -1); + initSegmentInfo(); } if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name); @@ -324,7 +319,7 @@ boolean success = false; try { - consumer.processDocument(fieldInfos); + consumer.processDocument(fieldInfos, segmentInfo, directory); success = true; } finally { if (!success) { @@ -340,7 +335,7 @@ } success = false; try { - consumer.finishDocument(); + consumer.finishDocument(directory, segmentInfo); success = true; } finally { if (!success) { @@ -407,6 +402,73 @@ } } + /** + * Create a directory and invert the given update fields into it, to be later + * used for adding field updates to stacked segments. + * + * @param fieldsUpdate + * Fields of the update. + * @param globalFieldNumbers + * Field numbers to use. + * @throws IOException + * If inversion goes wrong. + */ + public void invertFieldsUpdate(FieldsUpdate fieldsUpdate, + FieldNumbers globalFieldNumbers) throws IOException { + assert writer.testPoint("DocumentsWriterPerThread addDocument start"); + assert deleteQueue != null; + docState.doc = fieldsUpdate.fields; + docState.analyzer = fieldsUpdate.analyzer; + docState.docID = 0; + Directory updateDir = new RAMDirectory(); + DocConsumer updateConsumer = indexingChain.getChain(this); + Builder updateFieldInfos = new FieldInfos.Builder(globalFieldNumbers); + if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { + infoStream.message("DWPT", Thread.currentThread().getName() + " getUpdateDirectory"); + } + boolean success = false; + try { + try { + updateConsumer.processDocument(updateFieldInfos, new SegmentInfo( + updateDir, Constants.LUCENE_MAIN_VERSION, "_0", -1, false, codec, + null, null), updateDir); + } finally { + docState.clear(); + } + success = true; + } finally { + if (!success) { + if (aborting) { + abort(); + } + } + } + success = false; + try { + SegmentInfo updateSegment = new SegmentInfo(updateDir, + Constants.LUCENE_MAIN_VERSION, FIRST_SEGMENT_NAME, 1, false, codec, + null, null); + updateConsumer.finishDocument(updateDir, updateSegment); + + SegmentWriteState updateFlushState = new SegmentWriteState(infoStream, + updateDir, updateSegment, 0, updateFieldInfos.finish(), writer + .getConfig().getTermIndexInterval(), null, null, + IOContext.DEFAULT); + + updateConsumer.flush(updateFlushState); + + fieldsUpdate.directory = updateDir; + fieldsUpdate.segmentInfo = updateSegment; + fieldsUpdate.fields = null; + + success = true; + } finally { + if (!success) { + abort(); + } + } + } + // Buffer a specific docID for deletion. Currently only // used when we hit a exception when adding a document void deleteDocID(int docIDUpto) { @@ -441,9 +503,7 @@ /** Reset after a flush */ private void doAfterFlush() { segmentInfo = null; - if (consumer != null) { - consumer.doAfterFlush(); - } + consumer.doAfterFlush(); directory.getCreatedFiles().clear(); fieldInfos = new FieldInfos.Builder(fieldInfos.globalFieldNumbers); parent.subtractFlushedNumDocs(numDocsInRAM); @@ -470,17 +530,16 @@ } /** Flush all pending docs to a new segment */ - FlushedSegment flush(long updateGen) throws IOException { + FlushedSegment flush() throws IOException { assert numDocsInRAM > 0; assert deleteSlice == null : "all deletes must be applied in prepareFlush"; if (segmentInfo == null) { return null; } segmentInfo.setDocCount(numDocsInRAM); - IOContext context = new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())); flushState = new SegmentWriteState(infoStream, directory, segmentInfo, 0, fieldInfos.finish(), writer.getConfig().getTermIndexInterval(), - pendingDeletes, pendingUpdates, context); + pendingDeletes, pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed()))); final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.; // Apply delete-by-docID now (delete-byDocID only @@ -512,14 +571,9 @@ try { consumer.flush(flushState); pendingDeletes.terms.clear(); - if (updateGen < 0) { - segmentInfo.setFiles(new HashSet(directory.getCreatedFiles())); - } else { - segmentInfo = baseSegmentInfo; - segmentInfo.addFiles(new HashSet(directory.getCreatedFiles())); - } + segmentInfo.setFiles(new HashSet(directory.getCreatedFiles())); - final SegmentInfoPerCommit segmentInfoPerCommit = new SegmentInfoPerCommit(segmentInfo, 0, -1L, updateGen); + final SegmentInfoPerCommit segmentInfoPerCommit = new SegmentInfoPerCommit(segmentInfo, 0, -1L, -1L); if (infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : (flushState.segmentInfo.getDocCount() - flushState.delCountOnFlush)) + " deleted docs"); infoStream.message("DWPT", "new segment has " + @@ -556,7 +610,7 @@ FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos, segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush, pendingUpdates, flushState.liveUpdates); - sealFlushedSegment(fs, updateGen); + sealFlushedSegment(fs); doAfterFlush(); success = true; @@ -575,7 +629,7 @@ * Seals the {@link SegmentInfo} for the new flushed segment and persists * the deleted documents {@link MutableBits}. */ - void sealFlushedSegment(FlushedSegment flushedSegment, long updateGen) throws IOException { + void sealFlushedSegment(FlushedSegment flushedSegment) throws IOException { assert flushedSegment != null; SegmentInfoPerCommit newSegment = flushedSegment.segmentInfo; @@ -589,7 +643,9 @@ if (writer.useCompoundFile(newSegment)) { // Now build compound file - Collection oldFiles = IndexWriter.createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context, updateGen); + Collection oldFiles = IndexWriter.createCompoundFile( + infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, + context, -1L); newSegment.info.setUseCompoundFile(true); writer.deleteNewFiles(oldFiles); } @@ -598,11 +654,7 @@ // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: - if (updateGen < 0) { - codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, newSegment.info, flushedSegment.fieldInfos, context); - } else { - codec.segmentInfoFormat().getSegmentInfoWriter().writeFilesList(directory, newSegment.info, updateGen, context); - } + codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, newSegment.info, flushedSegment.fieldInfos, context); // TODO: ideally we would freeze newSegment here!! // because any changes after writing the .si will be @@ -645,6 +697,39 @@ } } + void sealUpdatedSegment(SegmentInfo info, Directory directory, long updateGen) throws IOException { + assert updateGen > 0; + boolean success = false; + try { + IOContext context = new IOContext(new FlushInfo(info.getDocCount(), + info.sizeInBytes())); + if (info.getUseCompoundFile()) { + + // Now build compound file + Collection oldFiles = IndexWriter.createCompoundFile( + infoStream, directory, MergeState.CheckAbort.NONE, info, context, + -1L); + writer.deleteNewFiles(oldFiles); + } + + // Have codec write files of updated segment. Must do this after creating + // CFS so that 1) .sif isn't slurped into CFS, and 2) .sif reflects + // useCompoundFile=true change above: + codec.segmentInfoFormat().getSegmentInfoWriter() + .writeUpdatedSegmentFiles(directory, info, updateGen, context); + + success = true; + } finally { + if (!success) { + if (infoStream.isEnabled("DWPT")) { + infoStream.message("DWPT", "hit exception " + + "writing compound file for updated segment " + info.name); + } + writer.flushFailed(info); + } + } + } + /** Get current segment info we are writing. */ SegmentInfo getSegmentInfo() { return segmentInfo; @@ -661,8 +746,11 @@ /* if you increase this, you must fix field cache impl for * getTerms/getTermsIndex requires <= 32768 */ final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; + + /* A segment name to be used when inverting field updates into temporary + * directories, using the convention in IndexWriter.newSegmentName(). */ + final static String FIRST_SEGMENT_NAME = "_" + Integer.toString(0, Character.MAX_RADIX); - private static class IntBlockAllocator extends IntBlockPool.Allocator { private final Counter bytesUsed; @@ -686,9 +774,9 @@ } } - PerDocWriteState newPerDocWriteState() { + PerDocWriteState newPerDocWriteState(SegmentInfo segmentInfo, Directory trackingDirectory) { assert segmentInfo != null; - return new PerDocWriteState(infoStream, directory, segmentInfo, bytesUsed, "", IOContext.DEFAULT); + return new PerDocWriteState(infoStream, trackingDirectory, segmentInfo, bytesUsed, "", IOContext.DEFAULT); } @Override Index: lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (working copy) @@ -1,6 +1,10 @@ package org.apache.lucene.index; +import java.util.HashSet; +import java.util.Set; + import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.store.Directory; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -39,9 +43,13 @@ } final Operation operation; - final IndexDocument fields; + final Set replacedFields; final Analyzer analyzer; final int docIDUpto; + + IndexDocument fields; + Directory directory; + SegmentInfo segmentInfo; /** * An update of fields which is not assigned to a specific live segment. @@ -49,14 +57,25 @@ * @param operation * The type of update operation. * @param fields - * The fields to use in the update. + * The fields to use in the update operation. * @param analyzer * The analyzer to use in the update. */ public FieldsUpdate(Operation operation, IndexDocument fields, Analyzer analyzer) { + this.fields = fields; this.operation = operation; - this.fields = fields; + if (operation == Operation.ADD_FIELDS) { + replacedFields = null; + } else { + replacedFields = new HashSet(); + for (IndexableField field : fields.indexableFields()) { + replacedFields.add(field.name()); + } + for (StorableField field : fields.storableFields()) { + replacedFields.add(field.name()); + } + } this.analyzer = analyzer; this.docIDUpto = -1; } @@ -72,11 +91,13 @@ */ public FieldsUpdate(FieldsUpdate other, int docIDUpto) { this.operation = other.operation; - this.fields = other.fields; + this.replacedFields = other.replacedFields; this.analyzer = other.analyzer; this.docIDUpto = docIDUpto; + this.directory = other.directory; + this.segmentInfo = other.segmentInfo; } - + /* Order FrieldsUpdate by increasing docIDUpto */ @Override public int compareTo(FieldsUpdate other) { Index: lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -23,6 +23,7 @@ import java.util.Map; import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.IOUtils; @@ -114,7 +115,7 @@ } @Override - void finishDocument(TermsHash termsHash) { + void finishDocument(TermsHash termsHash, Directory directory, SegmentInfo info) { } @Override Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -1179,7 +1179,7 @@ */ public void addDocument(IndexDocument doc, Analyzer analyzer) throws IOException { - updateDocument(null, doc, analyzer); + replaceDocument(null, doc, analyzer); } /** @@ -1398,8 +1398,8 @@ boolean success = false; boolean anySegmentFlushed = false; try { - anySegmentFlushed = docWriter.updateFields(term, new FieldsUpdate( - operation, fields, analyzer)); + anySegmentFlushed = docWriter.updateFields(term, operation, fields, + analyzer, globalFieldNumberMap); success = true; updatesPending = true; } finally { @@ -2395,11 +2395,16 @@ deleter.checkpoint(segmentInfos, false); } - void writeSegmentUpdates(SegmentInfoPerCommit segment, + void writeSegmentUpdates(SegmentInfoPerCommit info, UpdatedSegmentData updates, IOContext context) throws IOException { - docWriter.writeUpdatedSegment(updates, segment, this.deleter); - - segment.advanceUpdateGen(); + // add updates, single update per document in each round, until all updates + // were added + while (updates != null) { + updates = docWriter.writeUpdatedSegment(info, updates, + config.getTermIndexInterval(), globalFieldNumberMap, deleter); + info.advanceUpdateGen(); + } + deleter.checkpoint(segmentInfos, false); } synchronized void publishFrozenDeletes(FrozenBufferedDeletes packet) { @@ -3954,7 +3959,7 @@ assert delCount > reader.numDeletedDocs(); // beware of zombies SegmentReader newReader = new SegmentReader(info, context, - reader.core, liveDocs, info.info.getDocCount() - delCount); + reader.core, reader.updates, liveDocs, info.info.getDocCount() - delCount); boolean released = false; try { rld.release(reader); @@ -4569,8 +4574,12 @@ } } } - final String cfeFileName = IndexFileNames.segmentFileName(prefix, "", - IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); + String cfeFileName = IndexFileNames.fileNameFromGeneration(info.name, + IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION, updateGen, true); + if (cfeFileName == null) { + cfeFileName = IndexFileNames.segmentFileName(prefix, "", + IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); + } CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); IOException prior = null; Index: lucene/core/src/java/org/apache/lucene/index/InvertedDocConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/InvertedDocConsumer.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/InvertedDocConsumer.java (working copy) @@ -20,6 +20,8 @@ import java.io.IOException; import java.util.Map; +import org.apache.lucene.store.Directory; + abstract class InvertedDocConsumer { /** Abort (called after hitting AbortException) */ @@ -32,7 +34,7 @@ abstract void startDocument() throws IOException; - abstract void finishDocument() throws IOException; + abstract void finishDocument(Directory directory, SegmentInfo info) throws IOException; /** Attempt to free RAM, returning true if any RAM was * freed */ Index: lucene/core/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java (working copy) @@ -2,6 +2,8 @@ import java.io.IOException; +import org.apache.lucene.store.Directory; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -20,6 +22,6 @@ */ abstract class InvertedDocEndConsumerPerField { - abstract void finish() throws IOException; + abstract void finish(SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException; abstract void abort(); } Index: lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.Directory; final class NormsConsumerPerField extends InvertedDocEndConsumerPerField implements Comparable { private final FieldInfo fieldInfo; @@ -46,14 +47,14 @@ } @Override - void finish() throws IOException { + void finish(SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException { if (fieldInfo.isIndexed() && !fieldInfo.omitsNorms()) { similarity.computeNorm(fieldState, norm); if (norm.type() != null) { StorableField field = norm.field(); // some similarity might not compute any norms - DocValuesConsumer consumer = getConsumer(norm.type()); + DocValuesConsumer consumer = getConsumer(norm.type(), segmentInfo, trackingDirectory); consumer.add(docState.docID, field); } } @@ -67,11 +68,11 @@ return initType; } - private DocValuesConsumer getConsumer(Type type) throws IOException { + private DocValuesConsumer getConsumer(Type type, SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException { if (consumer == null) { assert fieldInfo.getNormType() == null || fieldInfo.getNormType() == type; fieldInfo.setNormValueType(type); - consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(), fieldInfo, type); + consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(segmentInfo, trackingDirectory), fieldInfo, type); this.initType = type; } if (initType != type) { Index: lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (working copy) @@ -211,7 +211,9 @@ } shared = true; if (liveDocs != null) { - return new SegmentReader(reader.getSegmentInfo(), context, reader.core, liveDocs, info.info.getDocCount() - info.getDelCount() - pendingDeleteCount); + return new SegmentReader(reader.getSegmentInfo(), context, reader.core, + reader.updates, liveDocs, info.info.getDocCount() + - info.getDelCount() - pendingDeleteCount); } else { assert reader.getLiveDocs() == liveDocs; reader.incRef(); Index: lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (working copy) @@ -80,7 +80,7 @@ private final Set coreClosedListeners = Collections.synchronizedSet(new LinkedHashSet()); - SegmentCoreReaders(SegmentReader owner, SegmentInfoPerCommit si, long updageGen, IOContext context, int termsIndexDivisor) throws IOException { + SegmentCoreReaders(SegmentReader owner, SegmentInfo si, long updageGen, IOContext context, int termsIndexDivisor) throws IOException { if (termsIndexDivisor == 0) { throw new IllegalArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); @@ -89,11 +89,12 @@ final SegmentInfo info; final String infoName; if (updageGen == -1) { - info = si.info; + info = si; infoName = info.name; } else { - info = new SegmentInfo(si.info, updageGen); - infoName = IndexFileNames.fileNameFromGeneration(si.info.name, "", updageGen, true); + info = new SegmentInfo(si, updageGen); + info.setDocCount(si.getDocCount()); + infoName = IndexFileNames.fileNameFromGeneration(si.name, "", updageGen, true); } Directory dir = info.dir; Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -104,7 +104,8 @@ this.dir = info.dir; this.version = info.version; this.name = IndexFileNames.updatedSegmentFileNameFromGeneration(info.name, gen); - this.docCount = info.docCount; + //this.docCount = info.docCount; + this.docCount = -1; this.isCompoundFile = info.isCompoundFile; this.codec = info.codec; this.diagnostics = info.diagnostics; Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (working copy) @@ -49,7 +49,8 @@ * @param info {@link SegmentInfo} that we wrap * @param delCount number of deleted documents in this segment * @param delGen deletion generation number (used to name - deletion files) + * deletion files) + * @param updateGen update generation number **/ public SegmentInfoPerCommit(SegmentInfo info, int delCount, long delGen, long updateGen) { this.info = info; Index: lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -77,7 +77,7 @@ } } - final void add(SegmentReader reader) { + final void add(AtomicReader reader) { mergeState.readers.add(reader); } Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -52,8 +52,8 @@ private final int numDocs; final SegmentCoreReaders core; + final SegmentCoreReaders[] updates; - private SegmentCoreReaders[] updates; private final IOContext context; private Fields fields; private FieldInfos fieldInfos; @@ -74,8 +74,8 @@ IOContext context) throws IOException { this.si = si; this.context = context; - core = new SegmentCoreReaders(this, si, -1, context, termInfosIndexDivisor); - initUpdates(si, termInfosIndexDivisor, context); + core = new SegmentCoreReaders(this, si.info, -1, context, termInfosIndexDivisor); + updates = initUpdates(si, termInfosIndexDivisor, context); boolean success = false; try { if (si.hasDeletions()) { @@ -110,8 +110,8 @@ * loading new live docs from a new deletes file. Used by openIfChanged. */ SegmentReader(SegmentInfoPerCommit si, SegmentCoreReaders core, - IOContext context) throws IOException { - this(si, context, core, si.info.getCodec().liveDocsFormat() + SegmentCoreReaders[] updates, IOContext context) throws IOException { + this(si, context, core, updates, si.info.getCodec().liveDocsFormat() .readLiveDocs(si.info.dir, si, context), si.info.getDocCount() - si.getDelCount()); } @@ -122,12 +122,12 @@ * NRT reader */ SegmentReader(SegmentInfoPerCommit si, IOContext context, - SegmentCoreReaders core, Bits liveDocs, int numDocs) { + SegmentCoreReaders core, SegmentCoreReaders[] updates, Bits liveDocs, int numDocs) { this.si = si; this.context = context; this.core = core; core.incRef(); - this.updates = null; + this.updates = updates; // TODO : handle NRT updates, add field liveUpdates assert liveDocs != null; @@ -136,17 +136,18 @@ this.numDocs = numDocs; } - private void initUpdates(SegmentInfoPerCommit si, int termInfosIndexDivisor, + private SegmentCoreReaders[] initUpdates(SegmentInfoPerCommit si, int termInfosIndexDivisor, IOContext context) throws IOException { if (si.hasUpdates()) { - updates = new SegmentCoreReaders[(int) si.getUpdateGen()]; - for (int i = 0; i < updates.length; i++) { - updates[i] = new SegmentCoreReaders(this, si, i + 1, context, + SegmentCoreReaders[] newUpdates = new SegmentCoreReaders[(int) si + .getUpdateGen()]; + for (int i = 0; i < newUpdates.length; i++) { + newUpdates[i] = new SegmentCoreReaders(this, si.info, i + 1, context, termInfosIndexDivisor); } - return; + return newUpdates; } - updates = null; + return null; } @Override Index: lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java (working copy) @@ -172,7 +172,8 @@ // Steal the ref returned by SegmentReader ctor: assert infos.info(i).info.dir == newReaders[i].getSegmentInfo().info.dir; assert infos.info(i).hasDeletions(); - newReaders[i] = new SegmentReader(infos.info(i), newReaders[i].core, IOContext.READ); + newReaders[i] = new SegmentReader(infos.info(i), + newReaders[i].core, newReaders[i].updates, IOContext.READ); } } success = true; Index: lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -64,7 +65,7 @@ // It's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the FieldsWriter: - initFieldsWriter(state.context); + initFieldsWriter(state.context, state.directory, state.segmentInfo); fill(numDocs); } @@ -79,9 +80,10 @@ } } - private synchronized void initFieldsWriter(IOContext context) throws IOException { + private synchronized void initFieldsWriter(IOContext context, + Directory directory, SegmentInfo info) throws IOException { if (fieldsWriter == null) { - fieldsWriter = codec.storedFieldsFormat().fieldsWriter(docWriter.directory, docWriter.getSegmentInfo(), context); + fieldsWriter = codec.storedFieldsFormat().fieldsWriter(directory, info, context); lastDocID = 0; } } @@ -108,10 +110,10 @@ } } - void finishDocument() throws IOException { + void finishDocument(Directory directory, SegmentInfo info) throws IOException { assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start"); - initFieldsWriter(IOContext.DEFAULT); + initFieldsWriter(IOContext.DEFAULT, directory, info); fill(docState.docID); if (fieldsWriter != null && numStoredFields > 0) { Index: lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (working copy) @@ -21,6 +21,7 @@ import java.util.Map; import org.apache.lucene.codecs.TermVectorsWriter; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.ArrayUtil; @@ -82,16 +83,16 @@ } } - private final void initTermVectorsWriter() throws IOException { + private final void initTermVectorsWriter(Directory directory, SegmentInfo info) throws IOException { if (writer == null) { IOContext context = new IOContext(new FlushInfo(docWriter.getNumDocsInRAM(), docWriter.bytesUsed())); - writer = docWriter.codec.termVectorsFormat().vectorsWriter(docWriter.directory, docWriter.getSegmentInfo(), context); + writer = docWriter.codec.termVectorsFormat().vectorsWriter(directory, info, context); lastDocID = 0; } } @Override - void finishDocument(TermsHash termsHash) throws IOException { + void finishDocument(TermsHash termsHash, Directory directory, SegmentInfo info) throws IOException { assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start"); @@ -99,7 +100,7 @@ return; } - initTermVectorsWriter(); + initTermVectorsWriter(directory, info); fill(docState.docID); Index: lucene/core/src/java/org/apache/lucene/index/TermsHash.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/TermsHash.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/TermsHash.java (working copy) @@ -21,6 +21,7 @@ import java.util.HashMap; import java.util.Map; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; @@ -131,10 +132,10 @@ } @Override - void finishDocument() throws IOException { - consumer.finishDocument(this); + void finishDocument(Directory directory, SegmentInfo info) throws IOException { + consumer.finishDocument(this, directory, info); if (nextTermsHash != null) { - nextTermsHash.consumer.finishDocument(nextTermsHash); + nextTermsHash.consumer.finishDocument(nextTermsHash, directory, info); } } Index: lucene/core/src/java/org/apache/lucene/index/TermsHashConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/TermsHashConsumer.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/TermsHashConsumer.java (working copy) @@ -20,10 +20,12 @@ import java.io.IOException; import java.util.Map; +import org.apache.lucene.store.Directory; + abstract class TermsHashConsumer { abstract void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException; abstract void abort(); abstract void startDocument() throws IOException; - abstract void finishDocument(TermsHash termsHash) throws IOException; + abstract void finishDocument(TermsHash termsHash, Directory directory, SegmentInfo info) throws IOException; abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); } Index: lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (revision 1425451) +++ lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (working copy) @@ -1,20 +1,17 @@ package org.apache.lucene.index; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.SortedSet; +import java.util.PriorityQueue; import java.util.TreeMap; -import java.util.TreeSet; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.StoredField; -import org.apache.lucene.index.FieldsUpdate.Operation; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.Bits; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -38,37 +35,44 @@ */ class UpdatedSegmentData { + static final FieldInfos EMPTY_FIELD_INFOS = new FieldInfos(new FieldInfo[0]); + /** Updates mapped by doc ID, for each do sorted list of updates. */ - private TreeMap> updatesMap; + private TreeMap> updatesMap; - public long generation; + /** */ + private long generation; - private Map fieldGenerationReplacments; + private Map fieldGenerationReplacments = new HashMap(); - private Iterator>> updatesIterator; + private Iterator>> updatesIterator; private int currDocID; private int nextDocID; private int numDocs; - private SortedSet nextUpdate; + private PriorityQueue nextUpdate; private Analyzer analyzer; + private int termsIndexDivisor; + UpdatedSegmentData() { - updatesMap = new TreeMap>(); + updatesMap = new TreeMap>(); } void addUpdate(int docID, FieldsUpdate update) { - SortedSet prevUpdates = updatesMap.get(docID); + PriorityQueue prevUpdates = updatesMap.get(docID); if (prevUpdates == null) { - prevUpdates = new TreeSet(); + prevUpdates = new PriorityQueue(); updatesMap.put(docID, prevUpdates); + } else { + System.out.println(); } prevUpdates.add(update); } void addUpdates(int docID, FieldsUpdate[] updatesArray) { - SortedSet prevUpdates = updatesMap.get(docID); + PriorityQueue prevUpdates = updatesMap.get(docID); if (prevUpdates == null) { - prevUpdates = new TreeSet(); + prevUpdates = new PriorityQueue(); updatesMap.put(docID, prevUpdates); } for (int i = 0; i < updatesArray.length; i++) { @@ -87,12 +91,16 @@ * The updates generation. * @param numDocs * number of documents in the base segment + * @param termsIndexDivisor + * Terms index divisor to use in temporary segments */ - void startWriting(long generation, int numDocs) { + void startWriting(long generation, int numDocs, int termsIndexDivisor) { this.generation = generation; this.numDocs = numDocs; + this.termsIndexDivisor = termsIndexDivisor; updatesIterator = updatesMap.entrySet().iterator(); currDocID = 0; + fieldGenerationReplacments.clear(); // fetch the first actual updates document if exists nextDocUpdate(); } @@ -102,7 +110,7 @@ */ private void nextDocUpdate() { if (updatesIterator.hasNext()) { - Entry> docUpdates = updatesIterator + Entry> docUpdates = updatesIterator .next(); nextDocID = docUpdates.getKey(); nextUpdate = docUpdates.getValue(); @@ -112,39 +120,6 @@ } } - /** - * Get the next document to put in the updates index, could be an empty - * document. Updates the analyzer. - * - * @throws IOException - * If different analyzers were assigned to field updates affecting - * the next document. - */ - IndexDocument nextDocument() throws IOException { - IndexDocument toReturn = null; - if (currDocID < nextDocID) { - // empty document required - if (currDocID == numDocs - 1) { - // add document with stored field for getting right size of segment when - // reading stored documents - toReturn = STORED_FIELD_DOCUMENT; - } else { - toReturn = EMPTY_DOCUMENT; - } - } else if (currDocID < numDocs) { - // return an actual updates document... - toReturn = new UpdatesIndexDocument(nextUpdate); - // ... and fetch the next one if exists - nextDocUpdate(); - } else { - // no more documents required - return null; - } - - currDocID++; - return toReturn; - } - Analyzer getAnalyzer() { return analyzer; } @@ -153,136 +128,146 @@ return fieldGenerationReplacments; } - /** - * An {@link IndexDocument} containing all the updates to a certain document - * in a stacked segment, taking into account replacements. - *

- * Constructing an {@link UpdatesIndexDocument} also updates the containing - * {@link UpdatedSegmentData}'s analyzer and its - * {@link FieldGenerationReplacements} vectors for the relevant fields. - */ - private class UpdatesIndexDocument implements IndexDocument { - - Map> indexablesByField = new HashMap>(); - Map> storablesByField = new HashMap>(); - - public UpdatesIndexDocument(SortedSet fieldsUpdates) - throws IOException { - boolean setAnalyzer = true; - analyzer = null; - for (FieldsUpdate fieldsUpdate : fieldsUpdates) { - // set analyzer and check for analyzer conflict - if (setAnalyzer) { - analyzer = fieldsUpdate.analyzer; - setAnalyzer = false; - } else if (analyzer != fieldsUpdate.analyzer) { - throw new IOException( - "two analyzers assigned to one updated document"); - } - - if (fieldsUpdate.operation == Operation.REPLACE_FIELDS) { - // handle fields replacement - for (IndexableField field : fieldsUpdate.fields.indexableFields()) { - replaceField(field.name()); + AtomicReader nextReader() throws IOException { + AtomicReader toReturn = null; + if (currDocID < nextDocID) { + // empty documents reader required + toReturn = new UpdateAtomicReader(nextDocID - currDocID); + currDocID = nextDocID; + } else if (currDocID < numDocs) { + // get the an actual updates reader... + FieldsUpdate update = nextUpdate.poll(); + toReturn = new UpdateAtomicReader(update.directory, update.segmentInfo, + IOContext.DEFAULT); + + // ... and if done for this document remove from updates map + if (nextUpdate.isEmpty()) { + updatesIterator.remove(); + } + + // add generation replacements if exist + if (update.replacedFields != null) { + for (String fieldName : update.replacedFields) { + FieldGenerationReplacements fieldReplacement = fieldGenerationReplacments + .get(fieldName); + if (fieldReplacement == null) { + fieldReplacement = new FieldGenerationReplacements(); + fieldGenerationReplacments.put(fieldName, fieldReplacement); } - for (StorableField field : fieldsUpdate.fields.storableFields()) { - replaceField(field.name()); - } + fieldReplacement.set(currDocID, generation); } - - // add new fields - for (IndexableField field : fieldsUpdate.fields.indexableFields()) { - List fieldList = indexablesByField.get(field.name()); - if (fieldList == null) { - fieldList = new ArrayList(); - indexablesByField.put(field.name(), fieldList); - } - fieldList.add(field); - } - for (StorableField field : fieldsUpdate.fields.storableFields()) { - List fieldList = storablesByField.get(field.name()); - if (fieldList == null) { - fieldList = new ArrayList(); - storablesByField.put(field.name(), fieldList); - } - fieldList.add(field); - } } + // move to next doc id + nextDocUpdate(); + currDocID++; } - private void replaceField(String fieldName) { - // remove previous fields - indexablesByField.remove(fieldName); - storablesByField.remove(fieldName); - - // update field generation replacement vector - if (fieldGenerationReplacments == null) { - fieldGenerationReplacments = new HashMap(); + return toReturn; + } + + private class UpdateAtomicReader extends AtomicReader { + + final private SegmentCoreReaders core; + final private int numDocs; + + /** + * Constructor with fields directory, for actual updates. + * + * @param fieldsDir + * Directory with inverted fields. + * @param segmentInfo + * Info of the inverted fields segment. + * @param context + * IOContext to use. + * @throws IOException + * If cannot create the reader. + */ + UpdateAtomicReader(Directory fieldsDir, SegmentInfo segmentInfo, + IOContext context) throws IOException { + core = new SegmentCoreReaders(null, segmentInfo, -1, context, + termsIndexDivisor); + numDocs = 1; + } + + /** + * Constructor with fields directory, for actual updates. + */ + UpdateAtomicReader(int numDocs) { + core = null; + this.numDocs = numDocs; + } + + @Override + public Fields fields() throws IOException { + if (core == null) { + return null; } - FieldGenerationReplacements fieldReplacement = fieldGenerationReplacments - .get(fieldName); - if (fieldReplacement == null) { - fieldReplacement = new FieldGenerationReplacements(); - fieldGenerationReplacments.put(fieldName, fieldReplacement); + return core.fields; + } + + @Override + public DocValues docValues(String field) throws IOException { + if (core == null) { + return null; } - fieldReplacement.set(currDocID, generation); + return core.perDocProducer.docValues(field); } @Override - public Iterable indexableFields() { - List indexableFields = new ArrayList(); - for (List byField : indexablesByField.values()) { - indexableFields.addAll(byField); + public DocValues normValues(String field) throws IOException { + if (core == null) { + return null; } - return indexableFields; + return core.norms.docValues(field); } @Override - public Iterable storableFields() { - List storableFields = new ArrayList(); - for (List byField : storablesByField.values()) { - storableFields.addAll(byField); + public FieldInfos getFieldInfos() { + if (core == null) { + return EMPTY_FIELD_INFOS; } - return storableFields; + return core.fieldInfos; } - } - - /** - * An empty document to be used as filler to maintain doc IDs in stacked - * segments. - */ - private static final IndexDocument EMPTY_DOCUMENT = new IndexDocument() { @Override - public Iterable storableFields() { - return Collections.emptyList(); + public Bits getLiveDocs() { + return null; } @Override - public Iterable indexableFields() { - return Collections.emptyList(); + public Fields getTermVectors(int docID) throws IOException { + if (core == null) { + return null; + } + return core.termVectorsLocal.get().get(docID); } - }; - - private static final ArrayList STORED_FIELD_LIST = new ArrayList( - 1); - static { - STORED_FIELD_LIST.add(new StoredField("dummy", "")); - } - - /** - * A document containing only one stored field to be used as the last document - * in stacked segments. - */ - private static final IndexDocument STORED_FIELD_DOCUMENT = new IndexDocument() { + @Override - public Iterable storableFields() { - return STORED_FIELD_LIST; + public int numDocs() { + return numDocs; } @Override - public Iterable indexableFields() { - return Collections.emptyList(); + public int maxDoc() { + return numDocs; } - }; + + @Override + public void document(int docID, StoredFieldVisitor visitor) + throws IOException { + if (core == null) { + return; + } + core.fieldsReaderLocal.get().visitDocument(docID, visitor, null); + } + + @Override + public boolean hasDeletions() { + return false; + } + + @Override + protected void doClose() throws IOException {} + + } } Index: lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java =================================================================== --- lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java (revision 1425451) +++ lucene/core/src/test/org/apache/lucene/codecs/lucene40/values/TestDocValues.java (working copy) @@ -22,19 +22,12 @@ import java.util.Comparator; import java.util.Random; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.codecs.lucene40.values.Bytes; -import org.apache.lucene.codecs.lucene40.values.Floats; -import org.apache.lucene.codecs.lucene40.values.Ints; import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.index.StorableField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; Index: lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (revision 1425451) +++ lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (working copy) @@ -21,8 +21,10 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; +import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.FieldType; @@ -31,8 +33,10 @@ import org.apache.lucene.index.FieldsUpdate.Operation; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.UnorderedIndexData; public class TestFieldReplacements extends LuceneTestCase { private Directory dir; @@ -54,29 +58,14 @@ + "formas humanitatis per seacula quarta decima et quinta decima. Eodem modo typi, qui nunc " + "nobis videntur parum clari, fiant sollemnes in futurum."; - private final static boolean VERBOSE_FIELD_REPLACEMENTS = false; + private final static boolean VERBOSE_FIELD_REPLACEMENTS = true; @Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); - - // init fields data structures - int numFields = 4 + random().nextInt(4); - fieldNames = new String[numFields]; - fieldTokens = new String[numFields][]; - for (int i = 0; i < numFields; i++) { - fieldNames[i] = "f" + i; - ArrayList tokens = new ArrayList(); - final String[] allTokens = loremIpsum.split("\\s"); - for (int index = random().nextInt(2 + i); index < allTokens.length; index += 1 + random() - .nextInt(2 + i)) { - tokens.add(allTokens[index].toLowerCase()); - } - fieldTokens[i] = tokens.toArray(new String[tokens.size()]); - } } - + @Override public void tearDown() throws Exception { dir.close(); @@ -84,19 +73,21 @@ } public void testEmptyIndex() throws IOException { - // test performing fields addition and replace on an empty index + init(random()); + + // test performing fields addition and replacement on an empty index IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); HashSet usedTerms = new HashSet(); Operation operation = Operation.REPLACE_FIELDS; - writer.updateFields(operation, getOperationTerm(usedTerms), - getFields(usedTerms)); + writer.updateFields(operation, getOperationTerm(usedTerms, random()), + getFields(usedTerms, random())); operation = Operation.ADD_FIELDS; - writer.updateFields(operation, getOperationTerm(usedTerms), - getFields(usedTerms)); + writer.updateFields(operation, getOperationTerm(usedTerms, random()), + getFields(usedTerms, random())); writer.close(); @@ -105,23 +96,36 @@ directoryReader.close(); } - private void addDocuments() throws IOException { - + private void init(Random localRandom) { + int numFields = 4 + localRandom.nextInt(4); + fieldNames = new String[numFields]; + fieldTokens = new String[numFields][]; + for (int i = 0; i < numFields; i++) { + fieldNames[i] = "f" + i; + ArrayList tokens = new ArrayList(); + final String[] allTokens = loremIpsum.split("\\s"); + for (int index = localRandom.nextInt(2 + i); index < allTokens.length; index += 1 + localRandom + .nextInt(2 + i)) { + tokens.add(allTokens[index].toLowerCase()); + } + fieldTokens[i] = tokens.toArray(new String[tokens.size()]); + } + } + + private void addDocuments(Random localRandom) throws IOException { + init(localRandom); HashSet usedTerms = new HashSet(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); // add random documents - int numDocs = 10 + random().nextInt(50); + int numDocs = 10 + localRandom.nextInt(50); int nCommits = 0; for (int i = 0; i < numDocs; i++) { - // create fields - Document fields = getFields(usedTerms); - // select operation - int opIndex = random().nextInt(10); + int opIndex = localRandom.nextInt(10); Operation operation; if (opIndex <= 1) { if (opIndex == 0) { @@ -134,26 +138,29 @@ } // create term if needed - Term term = getOperationTerm(usedTerms); + Term term = getOperationTerm(usedTerms, localRandom); - writer.updateFields(operation, term, fields); + // create fields and update + writer.updateFields(operation, term, getFields(usedTerms, localRandom)); } else { if (opIndex == 2) { if (VERBOSE_FIELD_REPLACEMENTS) { System.out.println("REPLACE_DOCUMENTS"); } - Term term = getOperationTerm(usedTerms); - writer.replaceDocument(term, fields); + Term term = getOperationTerm(usedTerms, localRandom); + // create document and replace + writer.replaceDocument(term, getFields(usedTerms, localRandom)); } else { if (VERBOSE_FIELD_REPLACEMENTS) { System.out.println("ADD_DOCUMENT"); } - writer.addDocument(fields); + // create document and add + writer.addDocument(getFields(usedTerms, localRandom)); } } // commit about once every 10 docs - int interCommit = random().nextInt(10); + int interCommit = localRandom.nextInt(10); if (interCommit == 0) { if (VERBOSE_FIELD_REPLACEMENTS) { System.out.println("commit " + (++nCommits)); @@ -167,16 +174,16 @@ writer.close(); } - public Document getFields(HashSet usedTerms) { + public Document getFields(HashSet usedTerms, Random loaclRandom) { Document fields = new Document(); - int nFields = 1 + random().nextInt(5); + int nFields = 1 + loaclRandom.nextInt(5); for (int j = 0; j < nFields; j++) { - boolean indexed = random().nextInt(8) > 0; - int index = random().nextInt(fieldNames.length); + boolean indexed = loaclRandom.nextInt(8) > 0; + int index = loaclRandom.nextInt(fieldNames.length); String fieldName = fieldNames[index]; String value = createFieldValue(fieldTokens[index], fieldName, indexed, - usedTerms); + usedTerms, loaclRandom); if (indexed) { fields.add(new TextField(fieldName, value, Store.NO)); @@ -196,20 +203,20 @@ return fields; } - public Term getOperationTerm(HashSet usedTerms) { + public Term getOperationTerm(HashSet usedTerms, Random loaclRandom) { Term term = null; - boolean used = random().nextInt(5) < 4; + boolean used = loaclRandom.nextInt(5) < 4; if (used && !usedTerms.isEmpty()) { final Iterator iterator = usedTerms.iterator(); - int usedIndex = random().nextInt(usedTerms.size()); + int usedIndex = loaclRandom.nextInt(usedTerms.size()); for (int j = 0; j < usedIndex; j++) { iterator.next(); } term = iterator.next(); } else { // select term - int fieldIndex = random().nextInt(fieldNames.length); - int textIndex = random().nextInt(fieldTokens[fieldIndex].length / 10); + int fieldIndex = loaclRandom.nextInt(fieldNames.length); + int textIndex = loaclRandom.nextInt(fieldTokens[fieldIndex].length / 10); term = new Term(fieldNames[fieldIndex], fieldTokens[fieldIndex][textIndex]); } @@ -220,10 +227,10 @@ } private String createFieldValue(String[] tokens, String fieldName, - boolean indexed, HashSet usedTerms) { + boolean indexed, HashSet usedTerms, Random loaclRandom) { StringBuilder builder = new StringBuilder(); - int index = random().nextInt(Math.min(10, tokens.length)); + int index = loaclRandom.nextInt(Math.min(10, tokens.length)); while (index < tokens.length) { builder.append(tokens[index]); @@ -231,18 +238,184 @@ if (indexed) { usedTerms.add(new Term(fieldName, tokens[index])); } - index += 1 + random().nextInt(10); + index += 1 + loaclRandom.nextInt(10); } return builder.toString(); } public void testRandomIndexGeneration() throws IOException { - addDocuments(); + addDocuments(random()); DirectoryReader directoryReader = DirectoryReader.open(dir); directoryReader.close(); } + public void testIndexEquality() throws IOException { + // create index through updates + final Random localRandom = new Random(3); + addDocuments(localRandom); + RAMDirectory dir1 = new RAMDirectory(); + IndexWriter writer1 = new IndexWriter(dir1, new IndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + writer1.addIndexes(dir); + writer1.close(); + DirectoryReader reader1 = DirectoryReader.open(dir1); + + // create the same index directly + RAMDirectory dir2 = new RAMDirectory(); + IndexWriter writer2 = new IndexWriter(dir2, + new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + + Document doc = new Document(); + doc.add(new StoredField("f0","elit, magna volutpat. tation ea dolor consequat, facilisis odio te soluta doming facer qui me consuetudium littera per nunc ")); + doc.add(new TextField("f4","consectetuer tincidunt erat nostrud hendrerit dignissim claritatem me etiam quam claram, ", Store.NO)); + doc.add(new TextField("f3","nibh iriure qui liber claritatem. claram, seacula videntur sollemnes ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f0","dolore quis duis iriure illum accumsan blandit tempor nihil facer assum. qui lectores dynamicus, claram, quinta qui sollemnes ")); + doc.add(new TextField("f3","wisi vel accumsan liber qui nunc qui ", Store.NO)); + doc.add(new TextField("f4","adipiscing ea dolore claritatem. est litterarum qui fiant ", Store.NO)); + doc.add(new StoredField("f4","diam hendrerit illum cum claritatem. quam claram, litterarum fiant ")); + doc.add(new TextField("f1","volutpat. nostrud lobortis dolore nulla odio blandit eleifend quod eorum qui formas nunc nobis ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f5","magna dolore luptatum claritatem investigationes quod per ", Store.NO)); + doc.add(new TextField("f2","elit, sed dolore aliquip commodo eum dignissim feugait doming habent insitam; legunt est qui quarta parum ", Store.NO)); + doc.add(new StoredField("f3","nibh volutpat. in facilisis accumsan luptatum mazim lectores sequitur anteposuerit sollemnes ")); + doc.add(new TextField("f2","euismod suscipit eum dolor molestie at qui duis doming in lius qui notare nunc ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f4","tincidunt velit facilisis dignissim cum iis claram, ", Store.NO)); + doc.add(new StoredField("f4","ullamcorper accumsan delenit dolore nihil claritatem. mutationem clari, ")); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f3","exerci ea esse consequat, facilisis praesent placerat dynamicus, seacula qui ", Store.NO)); + doc.add(new TextField("f2","sed nonummy erat duis eum iriure dignissim duis nam assum. insitam; qui quam nunc futurum. ", Store.NO)); + doc.add(new TextField("f5","velit luptatum augue placerat quam ", Store.NO)); + doc.add(new TextField("f3","minim commodo facilisis qui imperdiet ii claritas seacula ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f2","tincidunt suscipit dolor eu dignissim delenit congue possim lius anteposuerit in ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f5","consectetuer illum eleifend processus fiant ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f0","nibh ut ut minim exerci ea duis esse et blandit luptatum facilisi. soluta doming quod typi usus quod dynamicus, consuetudium mirum quam quarta clari, in ", Store.NO)); + doc.add(new TextField("f4","wisi facilisis claritatem iis lius mutationem qui ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f4","nibh ullamcorper ea dignissim usus mutationem quarta ")); + doc.add(new StoredField("f4","consectetuer wisi ea illum facilisis assum. mutationem quarta clari, ")); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f2","ipsum ad duis dolor eu at nam doming habent eorum me consuetudium decima futurum. ", Store.NO)); + doc.add(new StoredField("f5","velit tempor processus putamus et typi, ")); + doc.add(new TextField("f4","adipiscing nibh wisi velit nulla nihil claritatem etiam quarta fiant ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f3","adipiscing wisi nisl consequat, dignissim nobis qui mirum fiant sollemnes ", Store.NO)); + doc.add(new TextField("f0","euismod ut ad nisl dolor eu blandit te eleifend nihil typi qui lectores claritas consuetudium gothica, claram, decima sollemnes ", Store.NO)); + doc.add(new TextField("f0","ut erat ut nisl ea dolor velit vel eros odio qui feugait facilisi. nihil assum. usus ii legunt littera decima nobis sollemnes ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f4","nostrud velit accumsan quod assum. claritatem. etiam et in ")); + doc.add(new TextField("f4","adipiscing ea facilisis nihil usus lius etiam qui ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f2","dolore tation duis in eu delenit nam placerat in qui quarta ", Store.NO)); + doc.add(new TextField("f2","ut suscipit duis at dignissim delenit soluta insitam; me quam qui futurum. ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f0","diam euismod quis autem consequat, eros iusto delenit feugait option quod habent claritatem claritatem. lectores consuetudium nunc per qui sollemnes ", Store.NO)); + doc.add(new TextField("f4","nibh tincidunt hendrerit nulla usus est quam qui ", Store.NO)); + doc.add(new TextField("f1","adipiscing diam nostrud duis at zzril te nobis congue est demonstraverunt lius consuetudium est claram, qui in ", Store.NO)); + doc.add(new StoredField("f0","nibh euismod magna erat suscipit duis dolor esse et delenit tempor quod typi in legunt littera nunc decima. in ")); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f4","diam ullamcorper dignissim assum. claritatem. me etiam qui clari, ", Store.NO)); + doc.add(new TextField("f0","ipsum ut volutpat. minim autem dolor vulputate vel dolore odio blandit cum nobis mazim placerat facer possim est lectores sequitur consuetudium claram, modo qui in ", Store.NO)); + doc.add(new TextField("f2","tincidunt nisl duis in zzril placerat habent qui parum litterarum qui ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f3","nibh ea consequat, accumsan tempor est dynamicus, seacula typi, videntur sollemnes ")); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f1","volutpat. duis dolor esse at iusto delenit doming est facit est consuetudium humanitatis sollemnes ", Store.NO)); + doc.add(new TextField("f2","ipsum ut nisl dolor dignissim nam placerat investigationes processus notare nunc in ", Store.NO)); + doc.add(new TextField("f1","ipsum tincidunt nostrud lobortis in vel nulla dolore placerat facit ii quam littera formas nunc clari, ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f3","adipiscing ea consequat, qui nobis ii mirum et sollemnes ", Store.NO)); + doc.add(new TextField("f5","velit te legere typi, ", Store.NO)); + doc.add(new TextField("f3","nisl in dignissim delenit placerat est claritatem. notare anteposuerit et videntur ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f1","dolore nostrud suscipit lobortis duis vel et delenit liber cum habent usus claritatem. qui formas nobis sollemnes futurum. ", Store.NO)); + doc.add(new TextField("f2","erat tation duis in molestie dignissim liber congue possim me qui litterarum eodem in ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f0","amet, elit, ut minim duis eum esse vel eu iusto blandit nam eleifend nihil typi usus facit legunt notare litterarum per decima clari, ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f2","euismod suscipit in velit delenit facer legunt quam formas parum ", Store.NO)); + doc.add(new StoredField("f4","ullamcorper accumsan delenit insitam; lius mutationem quarta decima. ")); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f3","ipsum adipiscing wisi nisl consequat, praesent placerat qui saepius. dynamicus, seacula videntur ")); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f0","consectetuer erat minim suscipit ea esse consequat, feugiat accumsan duis cum nihil typi claritatem facit etiam quam claram, quinta modo futurum. ")); + doc.add(new TextField("f4","tincidunt illum cum claritatem. mutationem litterarum ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f2","tincidunt erat ad aliquip duis velit dignissim delenit facer insitam; processus qui litterarum formas quarta ", Store.NO)); + doc.add(new StoredField("f2","erat ut nisl duis at feugait congue in lius anteposuerit nunc ")); + doc.add(new TextField("f3","adipiscing minim esse luptatum tempor imperdiet est saepius. seacula fiant ", Store.NO)); + doc.add(new TextField("f0","ipsum elit, magna suscipit dolor eu iusto feugait eleifend quod assum. non est investigationes claritas nunc seacula videntur ", Store.NO)); + doc.add(new TextField("f4","amet, nibh ullamcorper velit nulla dignissim quod insitam; lius decima. ", Store.NO)); + writer2.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f5","quis dolore eleifend investigationes mirum per eodem typi, ", Store.NO)); + writer2.addDocument(doc); + + writer2.close(); + DirectoryReader reader2 = DirectoryReader.open(dir2); + + UnorderedIndexData indexData1 = new UnorderedIndexData(reader1); + UnorderedIndexData indexData2 = new UnorderedIndexData(reader2); + + reader1.close(); + reader2.close(); + dir2.close(); + + assertEquals("indexes differ", indexData2, indexData1); + } + public void testStatisticsAfterFieldUpdates() throws IOException { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); @@ -311,7 +484,7 @@ final StorableField[] f1_0 = stored0.getFields("f1"); assertEquals("wrong numeber of stored fields", 1, f1_0.length); assertEquals("wrong field value", "d", f1_0[0].stringValue()); - + final StoredDocument stored1 = atomicReader.document(1); final StorableField[] f1_1 = stored1.getFields("f1"); assertEquals("wrong numeber of stored fields", 2, f1_1.length); @@ -322,7 +495,7 @@ final StorableField[] f1_2 = stored2.getFields("f1"); assertEquals("wrong numeber of stored fields", 1, f1_2.length); assertEquals("wrong field value", "d", f1_2[0].stringValue()); - + directoryReader.close(); } Index: lucene/core/src/test/org/apache/lucene/util/UnorderedIndexData.java =================================================================== --- lucene/core/src/test/org/apache/lucene/util/UnorderedIndexData.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/util/UnorderedIndexData.java (working copy) @@ -0,0 +1,210 @@ +package org.apache.lucene.util; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.StorableField; +import org.apache.lucene.index.StoredDocument; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Contains terms and stored fields extracted from a reader, without keeping doc + * IDs order. To be used to compare indexes which contain same documents in + * different order. + */ +public class UnorderedIndexData { + + ArrayList nonEmpty; + + public UnorderedIndexData(IndexReader reader) throws IOException { + Bits liveDocs = MultiFields.getLiveDocs(reader); + + ArrayList docs = new ArrayList(); + for (int i = 0; i < reader.maxDoc(); i++) { + if (liveDocs != null && liveDocs.get(i)) { + docs.add(new DocumentData(reader.document(i))); + } else { + docs.add(new DocumentData(new StoredDocument())); + } + } + + Fields fields = MultiFields.getFields(reader); + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + Terms terms = fields.terms(field); + TermsEnum termsIterator = terms.iterator(null); + BytesRef term; + while ((term = termsIterator.next()) != null) { + DocsEnum termDocs = MultiFields.getTermDocsEnum(reader, liveDocs, + field, term); + while (termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + docs.get(termDocs.docID()).addTerm(field, term.toString(), termDocs.freq()); + } + } + } + + nonEmpty = new ArrayList(); + for (DocumentData doc : docs) { + if (!doc.isEmpty()) { + nonEmpty.add(doc); + } + } + + System.out.println("Index"); + for (DocumentData doc : nonEmpty) { + System.out.println("Document"); + for (StorableField stored1 : doc.storedFields) { + System.out.println("Stored " + stored1.name() + "\t" + stored1.stringValue()); + } + } + Collections.sort(nonEmpty, new DocumentDataComparator()); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((nonEmpty == null) ? 0 : nonEmpty.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + UnorderedIndexData other = (UnorderedIndexData) obj; + if (nonEmpty == null) { + if (other.nonEmpty != null) return false; + } else if (!nonEmpty.equals(other.nonEmpty)) { + return false; + } + return true; + } + + private class DocumentData { + + List storedFields; + + Map termsMap = new HashMap(); + + public DocumentData(StoredDocument document) { + storedFields = document.getFields(); + } + + public boolean isEmpty() { + return storedFields.isEmpty() && termsMap.isEmpty(); + } + + public void addTerm(String field, String term, int freq) { + termsMap.put(field + ":" + term, freq); + } + } + + private class DocumentDataComparator implements Comparator { + + @Override + public int compare(DocumentData doc1, DocumentData doc2) { + // start with stored fields + List stored1 = doc1.storedFields; + List stored2 = doc2.storedFields; + if (stored1.isEmpty() && !stored2.isEmpty()) { + return -1; + } else if (!stored1.isEmpty() && stored2.isEmpty()) { + return 1; + } else { + Iterator iter1 = stored1.iterator(); + Iterator iter2 = stored2.iterator(); + while (iter1.hasNext()) { + if (!iter2.hasNext()) { + return -1; + } else { + StorableField f1 = iter1.next(); + StorableField f2 = iter2.next(); + int compFieldName = f1.name().compareTo(f2.name()); + if (compFieldName != 0) { + return compFieldName; + } + compFieldName = f1.stringValue().compareTo(f2.stringValue()); + if (compFieldName != 0) { + return compFieldName; + } + } + } + if (iter2.hasNext()) { + return 1; + } + } + + // now check index terms + Map terms1 = doc1.termsMap; + Map terms2 = doc2.termsMap; + if (terms1.isEmpty() && !terms2.isEmpty()) { + return -1; + } else if (!terms1.isEmpty() && terms2.isEmpty()) { + return 1; + } else { + Iterator iter1 = terms1.keySet().iterator(); + Iterator iter2 = terms2.keySet().iterator(); + while (iter1.hasNext()) { + if (!iter2.hasNext()) { + return -1; + } else { + String t1 = iter1.next(); + String t2 = iter2.next(); + int compTerm = t1.compareTo(t2); + if (compTerm != 0) { + return compTerm; + } + int compFreq = terms1.get(t1) - terms2.get(t2); + if (compFreq != 0) { + return compFreq; + } + } + } + if (iter2.hasNext()) { + return 1; + } + } + return 0; + + } + } +}