Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (revision 1454908) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (working copy) @@ -102,17 +102,8 @@ isCompoundFile, null, diagnostics, Collections.unmodifiableMap(attributes)); info.setFiles(files); - int updatesIndex = 1; - while (updatesIndex > 0) { - files = readFilesList(directory, segmentName, updatesIndex, context); - if (files == null) { - updatesIndex = -1; - } else { - info.addFiles(files); - updatesIndex++; - } - } - + addUpdateSegmentsFiles(info, directory, segmentName, context); + success = true; return info; } finally { @@ -124,8 +115,10 @@ } } - private Set readFilesList(Directory dir, String segment, long generation, IOContext context) throws IOException { - final String segFileName = IndexFileNames.fileNameFromGeneration(segment, Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + protected Set readFilesList(Directory dir, String segment, + long generation, IOContext context) throws IOException { + final String segFileName = IndexFileNames.fileNameFromGeneration(segment, + Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); if (!dir.fileExists(segFileName)) { return null; } Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (revision 1454908) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (working copy) @@ -125,9 +125,10 @@ } @Override - public void writeFilesList(Directory dir, SegmentInfo si, long generation, IOContext ioContext) throws IOException { - final String segFileName = IndexFileNames.fileNameFromGeneration(si.name, - Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + public void writeUpdatedSegmentFiles(Directory dir, SegmentInfo si, + long generation, IOContext ioContext) throws IOException { + final String segFileName = IndexFileNames.segmentFileName(si.name, "", + Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION); si.addFile(segFileName); boolean success = false; Index: lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java (working copy) @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.Collection; +import java.util.Set; import java.util.regex.Pattern; import org.apache.lucene.index.FieldGenerationReplacements; @@ -52,12 +53,24 @@ */ public FieldGenerationReplacements readGenerationReplacements(String field, SegmentInfoPerCommit info, IOContext context) throws IOException { - String fileName = getLastGenerationFileName(field, info.info.dir, info); - if (fileName == null) { - return null; + FieldGenerationReplacements reps = null; + + for (long gen = 1; gen <= info.getUpdateGen(); gen++) { + final String fileName = IndexFileNames.segmentFileName( + IndexFileNames.fileNameFromGeneration(info.info.name, "", gen, true), + field, FIELD_GENERATION_REPLACEMENT_EXTENSION); + if (info.info.dir.fileExists(fileName)) { + final FieldGenerationReplacements + newGeneration = internalReadGeneration(info.info.dir, fileName, context); + if (reps == null) { + reps = newGeneration; + } else { + reps.merge(newGeneration); + } + } } - - return internalReadGeneration(info.info.dir, fileName, context); + + return reps; } private FieldGenerationReplacements internalReadGeneration(Directory dir, @@ -78,19 +91,6 @@ } } - private String getLastGenerationFileName(String field, Directory dir, - SegmentInfoPerCommit info) throws IOException { - for (long i = info.getUpdateGen(); i > 0; i--) { - final String fileName = IndexFileNames.segmentFileName( - IndexFileNames.fileNameFromGeneration(info.info.name, "", i, false), - field, FIELD_GENERATION_REPLACEMENT_EXTENSION); - if (dir.fileExists(fileName)) { - return fileName; - } - } - return null; - } - /** * Read persisted field generation replacements from a given input. */ @@ -104,24 +104,15 @@ */ public void writeGenerationReplacement(String field, FieldGenerationReplacements reps, Directory dir, - SegmentInfoPerCommit info, IOContext context) throws IOException { + SegmentInfoPerCommit info, IOContext context, + Set generationReplacementFilenames) throws IOException { if (reps == null) { // nothing new to write return; } - // load replacements from previous file - String prevFileName = getLastGenerationFileName(field, dir, info); - final FieldGenerationReplacements existing; - if (prevFileName != null) { - existing = internalReadGeneration(dir, prevFileName, context); - existing.merge(reps); - } else { - existing = reps; - } - final String nameWithGeneration = IndexFileNames.fileNameFromGeneration( - info.info.name, "", info.getNextUpdateGen(), false); + info.info.name, "", info.getNextUpdateGen(), true); final String fileName = IndexFileNames.segmentFileName(nameWithGeneration, field, FIELD_GENERATION_REPLACEMENT_EXTENSION); @@ -133,13 +124,10 @@ } finally { if (!success) { IOUtils.closeWhileHandlingException(output); - info.info.dir.deleteFile(fileName); + dir.deleteFile(fileName); } else { + generationReplacementFilenames.add(fileName); output.close(); - if (prevFileName != null) { - // remove previous file - info.info.dir.deleteFile(prevFileName); - } } } } Index: lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoReader.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoReader.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.Set; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; @@ -44,4 +45,47 @@ * @throws IOException If an I/O error occurs */ public abstract SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException; + + /** + * Add files of update segments to the segment info. + * @param info The segment info to update + * @param dir The containing directory + * @param segmentName The name of the handled segment + * @param context The IOContext + * @throws IOException If error occurred when reading files lists + */ + protected void addUpdateSegmentsFiles(final SegmentInfo info, Directory dir, + String segmentName, IOContext context) + throws IOException { + int generation = 1; + while (generation > 0) { + Set files = readFilesList(dir, segmentName, generation, context); + if (files == null) { + generation = -1; + } else { + info.addFiles(files); + generation++; + } + } + } + + /** + * Read list of files related to a certain generation in an updated segment + * + * @param dir + * The containing directory + * @param segmentName + * The name of the handled segment + * @param generation + * The update generation + * @param context + * The IOContext + * @return A list of the files corresponding to the update generation. + * @throws IOException + * If error occurred when reading files list + */ + protected abstract Set readFilesList(Directory dir, + String segmentName, long generation, IOContext context) + throws IOException; + } Index: lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoWriter.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoWriter.java (working copy) @@ -44,7 +44,8 @@ * @param ioContext IO context to use * @throws IOException If an I/O error occurs */ - public abstract void write(Directory dir, SegmentInfo info, FieldInfos fis, IOContext ioContext) throws IOException; + public abstract void write(Directory dir, SegmentInfo info, FieldInfos fis, + IOContext ioContext) throws IOException; /** * Write the list of files belonging to an updates segment of the segment with @@ -57,5 +58,7 @@ * @throws IOException * If an I/O error occurs */ - public abstract void writeFilesList(Directory dir, SegmentInfo info, long generation, IOContext ioContext) throws IOException; + public abstract void writeUpdatedSegmentFiles(Directory dir, + SegmentInfo info, long generation, IOContext ioContext) + throws IOException; } Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (working copy) @@ -71,16 +71,7 @@ null, diagnostics, Collections.unmodifiableMap(attributes)); si.setFiles(files); - int updatesIndex = 1; - while (updatesIndex > 0) { - files = readFilesList(dir, segment, updatesIndex, context); - if (files == null) { - updatesIndex = -1; - } else { - si.addFiles(files); - updatesIndex++; - } - } + addUpdateSegmentsFiles(si, dir, segment, context); success = true; @@ -95,7 +86,7 @@ } } - private Set readFilesList(Directory dir, String segment, + protected Set readFilesList(Directory dir, String segment, long generation, IOContext context) throws IOException { final String fileName = IndexFileNames.fileNameFromGeneration(segment, Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java (working copy) @@ -65,7 +65,7 @@ } finally { if (!success) { IOUtils.closeWhileHandlingException(output); - si.dir.deleteFile(fileName); + dir.deleteFile(fileName); } else { output.close(); } @@ -73,10 +73,10 @@ } @Override - public void writeFilesList(Directory dir, SegmentInfo si, + public void writeUpdatedSegmentFiles(Directory dir, SegmentInfo si, long generation, IOContext ioContext) throws IOException { - final String fileName = IndexFileNames.fileNameFromGeneration(si.name, - Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + final String fileName = IndexFileNames.segmentFileName(si.name, "", + Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION); si.addFile(fileName); final IndexOutput output = dir.createOutput(fileName, ioContext); @@ -89,7 +89,7 @@ } finally { if (!success) { IOUtils.closeWhileHandlingException(output); - si.dir.deleteFile(fileName); + dir.deleteFile(fileName); } else { output.close(); } Index: lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (working copy) @@ -181,6 +181,7 @@ int delIDX = deletes.size()-1; List allDeleted = null; + List advanced = null; while (infosIDX >= 0) { //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); @@ -257,9 +258,12 @@ */ delIDX--; infosIDX--; - info.setBufferedDeletesGen(gen); + if (advanced == null) { + advanced = new ArrayList(); + } + advanced.add(info); - } else { + } else if (packet != null && packet.anyDeletes()){ //System.out.println(" gt"); if (coalescedDeletes != null) { @@ -291,10 +295,17 @@ if (infoStream.isEnabled("BD")) { infoStream.message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } + if (advanced == null) { + advanced = new ArrayList(); } - info.setBufferedDeletesGen(gen); + advanced.add(info); + } infosIDX--; + } else if (packet != null) { + delIDX--; + } else { + infosIDX--; } } @@ -303,20 +314,31 @@ final long updateSegGen = updateInfo.getBufferedDeletesGen(); for (FrozenBufferedDeletes updatePacket : deletes) { - if (updatePacket.anyUpdates() && updatePacket.delGen() <= updateSegGen) { + if (updatePacket.anyUpdates() && updateSegGen <= updatePacket.delGen()) { assert readerPool.infoIsLive(updateInfo); final ReadersAndLiveDocs rld = readerPool.get(updateInfo, true); final SegmentReader reader = rld.getReader(IOContext.READ); try { - anyNewDeletes |= applyTermUpdates(updatePacket.updateTerms, updatePacket.updateArrays, rld, reader); + anyNewDeletes |= applyTermUpdates(updatePacket.updateTerms, + updatePacket.updateArrays, rld, reader); } finally { rld.release(reader); readerPool.release(rld); } } + if (advanced == null) { + advanced = new ArrayList(); + } + advanced.add(updateInfo); } } + if (advanced != null) { + for (SegmentInfoPerCommit info : advanced) { + info.setBufferedDeletesGen(gen); + } + } + assert checkDeleteStats(); if (infoStream.isEnabled("BD")) { infoStream.message("BD", "applyDeletes took " + (System.currentTimeMillis()-t0) + " msec"); Index: lucene/core/src/java/org/apache/lucene/index/DocConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocConsumer.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/DocConsumer.java (working copy) @@ -19,9 +19,11 @@ import java.io.IOException; +import org.apache.lucene.store.Directory; + abstract class DocConsumer { - abstract void processDocument(FieldInfos.Builder fieldInfos) throws IOException; - abstract void finishDocument() throws IOException; + abstract void processDocument(FieldInfos.Builder fieldInfos, SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException; + abstract void finishDocument(Directory directory, SegmentInfo segmentInfo) throws IOException; abstract void flush(final SegmentWriteState state) throws IOException; abstract void abort(); abstract void doAfterFlush(); Index: lucene/core/src/java/org/apache/lucene/index/DocFieldConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocFieldConsumer.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/DocFieldConsumer.java (working copy) @@ -20,9 +20,12 @@ import java.io.IOException; import java.util.Map; +import org.apache.lucene.store.Directory; + abstract class DocFieldConsumer { /** Called when DocumentsWriterPerThread decides to create a new - * segment */ + * segment + */ abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; /** Called when an aborting exception is hit */ @@ -32,5 +35,5 @@ abstract DocFieldConsumerPerField addField(FieldInfo fi); - abstract void finishDocument() throws IOException; + abstract void finishDocument(Directory directory, SegmentInfo info) throws IOException; } Index: lucene/core/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (working copy) @@ -19,9 +19,11 @@ import java.io.IOException; +import org.apache.lucene.store.Directory; + abstract class DocFieldConsumerPerField { /** Processes all occurrences of a single field */ - abstract void processFields(IndexableField[] fields, int count) throws IOException; + abstract void processFields(IndexableField[] fields, int count, SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException; abstract void abort(); abstract FieldInfo getFieldInfo(); } Index: lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldInfosWriter; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Counter; @@ -177,7 +178,7 @@ } @Override - public void processDocument(FieldInfos.Builder fieldInfos) throws IOException { + public void processDocument(FieldInfos.Builder fieldInfos, SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException { consumer.startDocument(); storedConsumer.startDocument(); @@ -216,7 +217,7 @@ ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp); for(int i=0;i generationReplacments = liveUpdates - .getFieldGenerationReplacments(); - if (generationReplacments != null) { - for (Entry field : generationReplacments - .entrySet()) { - final GenerationReplacementsFormat repsFormat = codec - .generationReplacementsFormat(); - repsFormat.writeGenerationReplacement(field.getKey(), - field.getValue(), directory, info, IOContext.DEFAULT); + Set generationReplacementFilenames = null; + boolean success = false; + try { + merger.merge(); // merge 'em + + // write field generation replacements + final Map generationReplacments = updates + .getFieldGenerationReplacments(); + if (generationReplacments != null) { + generationReplacementFilenames = new HashSet(); + for (Entry field : generationReplacments + .entrySet()) { + codec.generationReplacementsFormat().writeGenerationReplacement( + field.getKey(), field.getValue(), trackingDir, infoPerCommit, + IOContext.DEFAULT, generationReplacementFilenames); + } + if (generationReplacementFilenames.isEmpty()) { + generationReplacementFilenames = null; + } } + + success = true; + } finally { + if (!success) { + synchronized (this) { + deleter.refresh(info.name); + } + } } - + // flush directly dwpt.clearDeleteSlice(); - dwpt.flush(info.getNextUpdateGen()); + info.setFiles(new HashSet(trackingDir.getCreatedFiles())); + dwpt.sealUpdatedSegment(info, trackingDir, + infoPerCommit.getNextUpdateGen(), generationReplacementFilenames); + + // add the final list of new files to infoPerCommit, must perform here + // since the list could change in sealUpdatedSegment + infoPerCommit.info.addFiles(info.files()); + } finally { if (dwpt.checkAndResetHasAborted()) { flushControl.doOnAbort(perThread); @@ -474,6 +538,8 @@ } finally { perThread.unlock(); } + + return null; } private boolean doFlush(DocumentsWriterPerThread flushingDWPT) @@ -510,7 +576,7 @@ ticket = ticketQueue.addFlushTicket(flushingDWPT); // flush concurrently without locking - final FlushedSegment newSegment = flushingDWPT.flush(-1); + final FlushedSegment newSegment = flushingDWPT.flush(); if (newSegment == null) { actualFlushes--; } else { Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (working copy) @@ -22,14 +22,18 @@ import java.util.Collection; import java.util.HashSet; import java.util.Locale; +import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; +import org.apache.lucene.index.FieldInfos.Builder; +import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.TrackingDirectoryWrapper; import org.apache.lucene.util.ByteBlockPool.Allocator; import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; @@ -85,9 +89,9 @@ final TermsHashConsumer termVectorsWriter = new TermVectorsConsumer(documentsWriterPerThread); final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); - final InvertedDocConsumer termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true, - new TermsHash(documentsWriterPerThread, termVectorsWriter, false, null)); - final NormsConsumer normsWriter = new NormsConsumer(); + final InvertedDocConsumer termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true, + new TermsHash(documentsWriterPerThread, termVectorsWriter, false, null)); + final NormsConsumer normsWriter = new NormsConsumer(documentsWriterPerThread); final DocInverter docInverter = new DocInverter(documentsWriterPerThread.docState, termsHash, normsWriter); final StoredFieldsConsumer storedFields = new TwoStoredFieldsConsumers( new StoredFieldsProcessor(documentsWriterPerThread), @@ -127,7 +131,6 @@ final SegmentInfoPerCommit segmentInfo; final FieldInfos fieldInfos; final FrozenBufferedDeletes segmentDeletes; - final BufferedUpdates segmentUpdates; final MutableBits liveDocs; final UpdatedSegmentData liveUpdates; final int delCount; @@ -138,8 +141,13 @@ UpdatedSegmentData liveUpdates) { this.segmentInfo = segmentInfo; this.fieldInfos = fieldInfos; - this.segmentDeletes = segmentDeletes != null && segmentDeletes.any() ? new FrozenBufferedDeletes(segmentDeletes, segmentUpdates, true) : null; - this.segmentUpdates = segmentUpdates; + if ((segmentDeletes != null && segmentDeletes.any()) + || (segmentUpdates != null && segmentUpdates.any())) { + this.segmentDeletes = new FrozenBufferedDeletes(segmentDeletes, + segmentUpdates, true); + } else { + this.segmentDeletes = null; + } this.liveDocs = liveDocs; this.liveUpdates = liveUpdates; this.delCount = delCount; @@ -190,7 +198,6 @@ BufferedDeletes pendingDeletes; BufferedUpdates pendingUpdates; SegmentInfo segmentInfo; // Current segment we are working on - SegmentInfo baseSegmentInfo; // name of the base segment for segmentInfo boolean aborting = false; // True if an abort is pending boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting @@ -223,6 +230,9 @@ intBlockAllocator = new IntBlockAllocator(bytesUsed); initialize(); this.indexingChain = indexingChain; + // this should be the last call in the ctor + // it really sucks that we need to pull this within the ctor and pass this ref to the chain! + consumer = indexingChain.getChain(this); } public DocumentsWriterPerThread(DocumentsWriterPerThread other, FieldInfos.Builder fieldInfos) { @@ -253,7 +263,7 @@ docState.analyzer = analyzer; docState.docID = numDocsInRAM; if (segmentInfo == null) { - initSegmentInfo(null, -1); + initSegmentInfo(); } if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name); @@ -261,7 +271,7 @@ boolean success = false; try { try { - consumer.processDocument(fieldInfos); + consumer.processDocument(fieldInfos, segmentInfo, directory); } finally { docState.clear(); } @@ -279,7 +289,7 @@ } success = false; try { - consumer.finishDocument(); + consumer.finishDocument(directory, segmentInfo); success = true; } finally { if (!success) { @@ -289,24 +299,14 @@ finishDocument(delTerm, null); } - void initSegmentInfo(SegmentInfo info, long updateGen) { - if (info == null) { - String segment = writer.newSegmentName(); - segmentInfo = new SegmentInfo(directoryOrig, - Constants.LUCENE_MAIN_VERSION, segment, -1, false, codec, null, null); - baseSegmentInfo = null; - } else { - baseSegmentInfo = info; - segmentInfo = new SegmentInfo(directoryOrig, - Constants.LUCENE_MAIN_VERSION, IndexFileNames.fileNameFromGeneration( - info.name, "", updateGen, true), -1, false, codec, null, null); - } + private void initSegmentInfo() { + String segment = writer.newSegmentName(); + segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segment, -1, + false, codec, null, null); assert numDocsInRAM == 0; if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { - infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentInfo.name + " delQueue=" + deleteQueue); + infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segment + " delQueue=" + deleteQueue); } - // reset consumer, may have previous segment name as inner state - consumer = indexingChain.getChain(this); } public int updateDocuments(Iterable docs, Analyzer analyzer, Term delTerm) throws IOException { @@ -314,7 +314,7 @@ assert deleteQueue != null; docState.analyzer = analyzer; if (segmentInfo == null) { - initSegmentInfo(null, -1); + initSegmentInfo(); } if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name); @@ -330,7 +330,7 @@ boolean success = false; try { - consumer.processDocument(fieldInfos); + consumer.processDocument(fieldInfos, segmentInfo, directory); success = true; } finally { if (!success) { @@ -346,7 +346,7 @@ } success = false; try { - consumer.finishDocument(); + consumer.finishDocument(directory, segmentInfo); success = true; } finally { if (!success) { @@ -413,6 +413,74 @@ } } + /** + * Create a directory and invert the given update fields into it, to be later + * used for adding field updates to stacked segments. + * + * @param fieldsUpdate + * Fields of the update. + * @param globalFieldNumbers + * Field numbers to use. + * @throws IOException + * If inversion goes wrong. + */ + public void invertFieldsUpdate(FieldsUpdate fieldsUpdate, + FieldNumbers globalFieldNumbers) throws IOException { + assert writer.testPoint("DocumentsWriterPerThread addDocument start"); + assert deleteQueue != null; + docState.doc = fieldsUpdate.fields; + docState.analyzer = fieldsUpdate.analyzer; + docState.docID = 0; + Directory updateDir = new RAMDirectory(); + DocConsumer updateConsumer = indexingChain.getChain(this); + Builder updateFieldInfos = new FieldInfos.Builder(globalFieldNumbers); + if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { + infoStream.message("DWPT", Thread.currentThread().getName() + " getUpdateDirectory"); + } + boolean success = false; + try { + try { + updateConsumer.processDocument(updateFieldInfos, new SegmentInfo( + updateDir, Constants.LUCENE_MAIN_VERSION, "_0", -1, false, codec, + null, null), updateDir); + } finally { + docState.clear(); + } + success = true; + } finally { + if (!success) { + if (aborting) { + abort(); + } + } + } + success = false; + try { + SegmentInfo updateSegment = new SegmentInfo(updateDir, + Constants.LUCENE_MAIN_VERSION, FIRST_SEGMENT_NAME, 1, false, codec, + null, null); + updateConsumer.finishDocument(updateDir, updateSegment); + + SegmentWriteState updateFlushState = new SegmentWriteState(infoStream, + updateDir, updateSegment, 0, updateFieldInfos.finish(), writer + .getConfig().getTermIndexInterval(), null, null, + IOContext.DEFAULT); + + updateConsumer.flush(updateFlushState); + + fieldsUpdate.directory = updateDir; + fieldsUpdate.segmentInfo = updateSegment; + fieldsUpdate.fields = null; + //fieldsUpdate.docIDUpto = + + success = true; + } finally { + if (!success) { + abort(); + } + } + } + // Buffer a specific docID for deletion. Currently only // used when we hit a exception when adding a document void deleteDocID(int docIDUpto) { @@ -447,9 +515,7 @@ /** Reset after a flush */ private void doAfterFlush() { segmentInfo = null; - if (consumer != null) { - consumer.doAfterFlush(); - } + consumer.doAfterFlush(); directory.getCreatedFiles().clear(); fieldInfos = new FieldInfos.Builder(fieldInfos.globalFieldNumbers); parent.subtractFlushedNumDocs(numDocsInRAM); @@ -476,17 +542,16 @@ } /** Flush all pending docs to a new segment */ - FlushedSegment flush(long updateGen) throws IOException { + FlushedSegment flush() throws IOException { assert numDocsInRAM > 0; assert deleteSlice == null : "all deletes must be applied in prepareFlush"; if (segmentInfo == null) { return null; } segmentInfo.setDocCount(numDocsInRAM); - IOContext context = new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())); flushState = new SegmentWriteState(infoStream, directory, segmentInfo, 0, fieldInfos.finish(), writer.getConfig().getTermIndexInterval(), - pendingDeletes, pendingUpdates, context); + pendingDeletes, pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed()))); final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.; // Apply delete-by-docID now (delete-byDocID only @@ -518,14 +583,9 @@ try { consumer.flush(flushState); pendingDeletes.terms.clear(); - if (updateGen < 0) { - segmentInfo.setFiles(new HashSet(directory.getCreatedFiles())); - } else { - segmentInfo = baseSegmentInfo; - segmentInfo.addFiles(new HashSet(directory.getCreatedFiles())); - } + segmentInfo.setFiles(new HashSet(directory.getCreatedFiles())); - final SegmentInfoPerCommit segmentInfoPerCommit = new SegmentInfoPerCommit(segmentInfo, 0, -1L, updateGen); + final SegmentInfoPerCommit segmentInfoPerCommit = new SegmentInfoPerCommit(segmentInfo, 0, -1L, -1L); if (infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : (flushState.segmentInfo.getDocCount() - flushState.delCountOnFlush)) + " deleted docs"); infoStream.message("DWPT", "new segment has " + @@ -562,7 +622,7 @@ FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos, segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush, pendingUpdates, flushState.liveUpdates); - sealFlushedSegment(fs, updateGen); + sealFlushedSegment(fs); doAfterFlush(); success = true; @@ -581,7 +641,7 @@ * Seals the {@link SegmentInfo} for the new flushed segment and persists * the deleted documents {@link MutableBits}. */ - void sealFlushedSegment(FlushedSegment flushedSegment, long updateGen) throws IOException { + void sealFlushedSegment(FlushedSegment flushedSegment) throws IOException { assert flushedSegment != null; SegmentInfoPerCommit newSegment = flushedSegment.segmentInfo; @@ -595,7 +655,9 @@ if (writer.useCompoundFile(newSegment)) { // Now build compound file - Collection oldFiles = IndexWriter.createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context, updateGen); + Collection oldFiles = IndexWriter.createCompoundFile( + infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, + context, -1L); newSegment.info.setUseCompoundFile(true); writer.deleteNewFiles(oldFiles); } @@ -604,11 +666,7 @@ // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: - if (updateGen < 0) { - codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, newSegment.info, flushedSegment.fieldInfos, context); - } else { - codec.segmentInfoFormat().getSegmentInfoWriter().writeFilesList(directory, newSegment.info, updateGen, context); - } + codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, newSegment.info, flushedSegment.fieldInfos, context); // TODO: ideally we would freeze newSegment here!! // because any changes after writing the .si will be @@ -651,6 +709,40 @@ } } + void sealUpdatedSegment(SegmentInfo info, Directory directory, + long updateGen, Set generationReplacementFilenames) + throws IOException { + assert updateGen > 0; + boolean success = false; + try { + // TODO : SY : size in bytes! + IOContext context = new IOContext(new FlushInfo(info.getDocCount(), 0)); + if (info.getUseCompoundFile()) { + // Now build compound file + Collection oldFiles = IndexWriter.createCompoundFile( + infoStream, directory, MergeState.CheckAbort.NONE, info, context, + -1L, generationReplacementFilenames); + writer.deleteNewFiles(oldFiles); + } + + // Have codec write files of updated segment. Must do this after creating + // CFS so that 1) .sif isn't slurped into CFS, and 2) .sif reflects + // useCompoundFile=true change above: + codec.segmentInfoFormat().getSegmentInfoWriter() + .writeUpdatedSegmentFiles(directory, info, updateGen, context); + + success = true; + } finally { + if (!success) { + if (infoStream.isEnabled("DWPT")) { + infoStream.message("DWPT", "hit exception " + + "writing compound file for updated segment " + info.name); + } + writer.flushFailed(info); + } + } + } + /** Get current segment info we are writing. */ SegmentInfo getSegmentInfo() { return segmentInfo; @@ -667,8 +759,11 @@ /* if you increase this, you must fix field cache impl for * getTerms/getTermsIndex requires <= 32768 */ final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; + + /* A segment name to be used when inverting field updates into temporary + * directories, using the convention in IndexWriter.newSegmentName(). */ + final static String FIRST_SEGMENT_NAME = "_" + Integer.toString(0, Character.MAX_RADIX); - private static class IntBlockAllocator extends IntBlockPool.Allocator { private final Counter bytesUsed; Index: lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (working copy) @@ -1,6 +1,10 @@ package org.apache.lucene.index; +import java.util.HashSet; +import java.util.Set; + import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.store.Directory; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -24,7 +28,7 @@ /** * Specifies the operation to perform when updating fields. */ - public enum Operation { + enum Operation { /** * Add the given fields to all existing documents matching the update * criterion. @@ -38,10 +42,14 @@ REPLACE_FIELDS } - public final Operation operation; - public final IndexDocument fields; - public final Analyzer analyzer; - public final int docIDUpto; + final Operation operation; + final Set replacedFields; + final Analyzer analyzer; + final int docIDUpto; + + IndexDocument fields; + Directory directory; + SegmentInfo segmentInfo; /** * An update of fields which is not assigned to a specific live segment. @@ -49,16 +57,29 @@ * @param operation * The type of update operation. * @param fields - * The fields to use in the update. + * The fields to use in the update operation. * @param analyzer * The analyzer to use in the update. + * @param docIDUpto + * Document ID of the last document added before this field update */ public FieldsUpdate(Operation operation, IndexDocument fields, - Analyzer analyzer) { + Analyzer analyzer, int docIDUpto) { + this.fields = fields; this.operation = operation; - this.fields = fields; + if (operation == Operation.ADD_FIELDS) { + replacedFields = null; + } else { + replacedFields = new HashSet(); + for (IndexableField field : fields.indexableFields()) { + replacedFields.add(field.name()); + } + for (StorableField field : fields.storableFields()) { + replacedFields.add(field.name()); + } + } this.analyzer = analyzer; - this.docIDUpto = -1; + this.docIDUpto = docIDUpto; } /** @@ -72,11 +93,13 @@ */ public FieldsUpdate(FieldsUpdate other, int docIDUpto) { this.operation = other.operation; - this.fields = other.fields; + this.replacedFields = other.replacedFields; this.analyzer = other.analyzer; this.docIDUpto = docIDUpto; + this.directory = other.directory; + this.segmentInfo = other.segmentInfo; } - + /* Order FrieldsUpdate by increasing docIDUpto */ @Override public int compareTo(FieldsUpdate other) { Index: lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -23,6 +23,7 @@ import java.util.Map; import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.IOUtils; @@ -114,7 +115,7 @@ } @Override - void finishDocument(TermsHash termsHash) { + void finishDocument(TermsHash termsHash, Directory directory, SegmentInfo info) { } @Override Index: lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java (working copy) @@ -180,8 +180,14 @@ /** locates the boundary of the segment name, or -1 */ private static int indexOfSegmentName(String filename) { + int offset = filename.startsWith("__") ? 2 : 1; + return indexOfSegmentName(filename, offset); + } + + /** locates the boundary of the segment name starting from given offset, or -1 */ + private static int indexOfSegmentName(String filename, int offset) { // If it is a .del file, there's an '_' after the first character - int idx = filename.indexOf('_', 1); + int idx = filename.indexOf('_', offset); if (idx == -1) { // If it's not, strip everything that's before the '.' idx = filename.indexOf('.'); @@ -235,4 +241,8 @@ // All files created by codecs much match this pattern (we // check this in SegmentInfo.java): static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[_]?[a-z0-9]+(_.*)?\\..*"); + + public static boolean isUpdatedSegmentFile(String file) { + return file.startsWith("__"); + } } Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -891,11 +891,6 @@ */ public void close(boolean waitForMerges) throws IOException { - // commit pending updates - if (updatesPending) { - commitInternal(); - } - // Ensure that only one thread actually gets to do the // closing, and make sure no commit is also in progress: synchronized (commitLock) { @@ -948,8 +943,6 @@ + waitForMerges); } - docWriter.close(); - try { // Only allow a new merge to be triggered if we are // going to wait for merges: @@ -959,6 +952,8 @@ docWriter.abort(); // already closed } + docWriter.close(); + } finally { try { // clean up merge scheduler in all cases, although flushing may have @@ -1188,7 +1183,7 @@ */ public void addDocument(IndexDocument doc, Analyzer analyzer) throws IOException { - updateDocument(null, doc, analyzer); + replaceDocument(null, doc, analyzer); } /** @@ -1407,8 +1402,8 @@ boolean success = false; boolean anySegmentFlushed = false; try { - anySegmentFlushed = docWriter.updateFields(term, new FieldsUpdate( - operation, fields, analyzer)); + anySegmentFlushed = docWriter.updateFields(term, operation, fields, + analyzer, globalFieldNumberMap); success = true; updatesPending = true; } finally { @@ -2407,11 +2402,16 @@ deleter.checkpoint(segmentInfos, false); } - void writeSegmentUpdates(SegmentInfoPerCommit segment, + void writeSegmentUpdates(SegmentInfoPerCommit info, UpdatedSegmentData updates, IOContext context) throws IOException { - docWriter.writeUpdatedSegment(updates, segment, this.deleter); - - segment.advanceUpdateGen(); + // add updates, single update per document in each round, until all updates + // were added + while (updates != null) { + updates = docWriter.writeUpdatedSegment(info, updates, + config.getTermIndexInterval(), globalFieldNumberMap, deleter); + info.advanceUpdateGen(); + } + deleter.checkpoint(segmentInfos, false); } synchronized void publishFrozenDeletes(FrozenBufferedDeletes packet) { @@ -2441,7 +2441,7 @@ // Publishing the segment must be synched on IW -> BDS to make the sure // that no merge prunes away the seg. private delete packet final long nextGen; - if (packet != null && (packet.anyDeletes())) { + if (packet != null && (packet.anyDeletes() || packet.anyUpdates())) { nextGen = bufferedDeletesStream.push(packet); } else { // Since we don't have a delete packet to apply we can get a new @@ -2787,15 +2787,14 @@ segName, info.info.getDocCount(), info.info.getUseCompoundFile(), info.info.getCodec(), info.info.getDiagnostics(), attributes); SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo, - info.getDelCount(), info.getDelGen(), -1L); + info.getDelCount(), info.getDelGen(), info.getUpdateGen()); Set segFiles = new HashSet(); // Build up new segment's file names. Must do this // before writing SegmentInfo: for (String file : info.files()) { - final String newFileName; - newFileName = segName + IndexFileNames.stripSegmentName(file); + final String newFileName = getNewFileName(file, segName); segFiles.add(newFileName); } newInfo.setFiles(segFiles); @@ -2817,10 +2816,7 @@ // Copy the segment's files for (String file : info.files()) { - - final String newFileName = segName - + IndexFileNames.stripSegmentName(file); - + final String newFileName = getNewFileName(file, segName); if (siFiles.contains(newFileName)) { // We already rewrote this above continue; @@ -2844,6 +2840,19 @@ return newInfoPerCommit; } + + /** + * Get a new name for a given file to be located in given segment. + */ + public String getNewFileName(String file, String segName) { + final String newFileName; + if (IndexFileNames.isUpdatedSegmentFile(file)) { + newFileName = "_" + segName + IndexFileNames.stripSegmentName(file); + } else { + newFileName = segName + IndexFileNames.stripSegmentName(file); + } + return newFileName; + } /** * A hook for extending classes to execute operations after pending added and @@ -3964,7 +3973,7 @@ assert delCount > reader.numDeletedDocs(); // beware of zombies SegmentReader newReader = new SegmentReader(info, context, - reader.core, liveDocs, info.info.getDocCount() - delCount); + reader.core, reader.updates, liveDocs, info.info.getDocCount() - delCount); boolean released = false; try { rld.release(reader); @@ -4552,6 +4561,23 @@ static final Collection createCompoundFile(InfoStream infoStream, Directory directory, CheckAbort checkAbort, final SegmentInfo info, IOContext context, long updateGen) throws IOException { + return createCompoundFile(infoStream, directory, checkAbort, info, context, + updateGen, null); + } + + /** + * NOTE: this method creates a compound file for all files returned by + * info.files(). While, generally, this may include separate norms and + * deletion files, this SegmentInfo must not reference such files when this + * method is called, because they are not allowed within a compound file. The + * value of updateGen for a base segment must be negative. + * This version allows excluding files given by file names from the compound + * file. + */ + static final Collection createCompoundFile(InfoStream infoStream, + Directory directory, CheckAbort checkAbort, final SegmentInfo info, + IOContext context, long updateGen, + Set excludedFiles) throws IOException { String fileName = IndexFileNames.fileNameFromGeneration(info.name, IndexFileNames.COMPOUND_FILE_EXTENSION, updateGen, true); @@ -4579,8 +4605,16 @@ } } } - final String cfeFileName = IndexFileNames.segmentFileName(prefix, "", - IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); + if (excludedFiles != null) { + files = new HashSet(files); + files.removeAll(excludedFiles); + } + String cfeFileName = IndexFileNames.fileNameFromGeneration(info.name, + IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION, updateGen, true); + if (cfeFileName == null) { + cfeFileName = IndexFileNames.segmentFileName(prefix, "", + IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); + } CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); IOException prior = null; Index: lucene/core/src/java/org/apache/lucene/index/InvertedDocConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/InvertedDocConsumer.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/InvertedDocConsumer.java (working copy) @@ -20,6 +20,8 @@ import java.io.IOException; import java.util.Map; +import org.apache.lucene.store.Directory; + abstract class InvertedDocConsumer { /** Abort (called after hitting AbortException) */ @@ -32,5 +34,5 @@ abstract void startDocument() throws IOException; - abstract void finishDocument() throws IOException; + abstract void finishDocument(Directory directory, SegmentInfo info) throws IOException; } Index: lucene/core/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java (working copy) @@ -2,6 +2,8 @@ import java.io.IOException; +import org.apache.lucene.store.Directory; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -20,6 +22,6 @@ */ abstract class InvertedDocEndConsumerPerField { - abstract void finish() throws IOException; + abstract void finish(SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException; abstract void abort(); } Index: lucene/core/src/java/org/apache/lucene/index/NormsConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/NormsConsumer.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/NormsConsumer.java (working copy) @@ -34,6 +34,8 @@ final class NormsConsumer extends InvertedDocEndConsumer { + public NormsConsumer(DocumentsWriterPerThread documentsWriterPerThread) {} + @Override void abort() {} Index: lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.Directory; final class NormsConsumerPerField extends InvertedDocEndConsumerPerField implements Comparable { private final FieldInfo fieldInfo; @@ -40,25 +41,25 @@ } @Override - void finish() throws IOException { + void finish(SegmentInfo segmentInfo, Directory trackingDirectory) throws IOException { if (fieldInfo.isIndexed() && !fieldInfo.omitsNorms()) { - if (consumer == null) { - fieldInfo.setNormValueType(FieldInfo.DocValuesType.NUMERIC); - consumer = new NumericDocValuesWriter(fieldInfo, docState.docWriter.bytesUsed); + if (consumer == null) { + fieldInfo.setNormValueType(FieldInfo.DocValuesType.NUMERIC); + consumer = new NumericDocValuesWriter(fieldInfo, docState.docWriter.bytesUsed); } consumer.addValue(docState.docID, similarity.computeNorm(fieldState)); - } + } } void flush(SegmentWriteState state, DocValuesConsumer normsWriter) throws IOException { - int docCount = state.segmentInfo.getDocCount(); - if (consumer == null) { - return; // null type - not omitted but not written - - // meaning the only docs that had - // norms hit exceptions (but indexed=true is set...) - } - consumer.finish(docCount); - consumer.flush(state, normsWriter); + int docCount = state.segmentInfo.getDocCount(); + if (consumer == null) { + return; // null type - not omitted but not written - + // meaning the only docs that had + // norms hit exceptions (but indexed=true is set...) + } + consumer.finish(docCount); + consumer.flush(state, normsWriter); } boolean isEmpty() { Index: lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (working copy) @@ -224,7 +224,9 @@ } shared = true; if (liveDocs != null) { - return new SegmentReader(reader.getSegmentInfo(), context, reader.core, liveDocs, info.info.getDocCount() - info.getDelCount() - pendingDeleteCount); + return new SegmentReader(reader.getSegmentInfo(), context, reader.core, + reader.updates, liveDocs, info.info.getDocCount() + - info.getDelCount() - pendingDeleteCount); } else { assert reader.getLiveDocs() == liveDocs; reader.incRef(); Index: lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (working copy) @@ -100,7 +100,7 @@ private final Set coreClosedListeners = Collections.synchronizedSet(new LinkedHashSet()); - SegmentCoreReaders(SegmentReader owner, SegmentInfoPerCommit si, long updageGen, IOContext context, int termsIndexDivisor) throws IOException { + SegmentCoreReaders(SegmentReader owner, SegmentInfo si, long updageGen, IOContext context, int termsIndexDivisor) throws IOException { if (termsIndexDivisor == 0) { throw new IllegalArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); @@ -109,11 +109,12 @@ final SegmentInfo info; final String infoName; if (updageGen == -1) { - info = si.info; + info = si; infoName = info.name; } else { - info = new SegmentInfo(si.info, updageGen); - infoName = IndexFileNames.fileNameFromGeneration(si.info.name, "", updageGen, true); + info = new SegmentInfo(si, updageGen); + info.setDocCount(si.getDocCount()); + infoName = IndexFileNames.fileNameFromGeneration(si.name, "", updageGen, true); } Directory dir = info.dir; Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -101,7 +101,8 @@ this.dir = info.dir; this.version = info.version; this.name = IndexFileNames.updatedSegmentFileNameFromGeneration(info.name, gen); - this.docCount = info.docCount; + //this.docCount = info.docCount; + this.docCount = -1; this.isCompoundFile = info.isCompoundFile; this.codec = info.codec; this.diagnostics = info.diagnostics; Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (working copy) @@ -53,7 +53,8 @@ * @param info {@link SegmentInfo} that we wrap * @param delCount number of deleted documents in this segment * @param delGen deletion generation number (used to name - deletion files) + * deletion files) + * @param updateGen update generation number **/ public SegmentInfoPerCommit(SegmentInfo info, int delCount, long delGen, long updateGen) { this.info = info; Index: lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -76,7 +76,7 @@ } } - void add(SegmentReader reader) { + void add(AtomicReader reader) { mergeState.readers.add(reader); } Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -51,8 +51,8 @@ private final int numDocs; final SegmentCoreReaders core; + final SegmentCoreReaders[] updates; - private SegmentCoreReaders[] updates; private final IOContext context; private Fields fields; private FieldInfos fieldInfos; @@ -73,8 +73,8 @@ IOContext context) throws IOException { this.si = si; this.context = context; - core = new SegmentCoreReaders(this, si, -1, context, termInfosIndexDivisor); - initUpdates(si, termInfosIndexDivisor, context); + core = new SegmentCoreReaders(this, si.info, -1, context, termInfosIndexDivisor); + updates = initUpdates(si, termInfosIndexDivisor, context); boolean success = false; try { if (si.hasDeletions()) { @@ -109,8 +109,8 @@ * loading new live docs from a new deletes file. Used by openIfChanged. */ SegmentReader(SegmentInfoPerCommit si, SegmentCoreReaders core, - IOContext context) throws IOException { - this(si, context, core, si.info.getCodec().liveDocsFormat() + SegmentCoreReaders[] updates, IOContext context) throws IOException { + this(si, context, core, updates, si.info.getCodec().liveDocsFormat() .readLiveDocs(si.info.dir, si, context), si.info.getDocCount() - si.getDelCount()); } @@ -121,12 +121,12 @@ * NRT reader */ SegmentReader(SegmentInfoPerCommit si, IOContext context, - SegmentCoreReaders core, Bits liveDocs, int numDocs) { + SegmentCoreReaders core, SegmentCoreReaders[] updates, Bits liveDocs, int numDocs) { this.si = si; this.context = context; this.core = core; core.incRef(); - this.updates = null; + this.updates = updates; // TODO : handle NRT updates, add field liveUpdates assert liveDocs != null; @@ -135,17 +135,18 @@ this.numDocs = numDocs; } - private void initUpdates(SegmentInfoPerCommit si, int termInfosIndexDivisor, + private SegmentCoreReaders[] initUpdates(SegmentInfoPerCommit si, int termInfosIndexDivisor, IOContext context) throws IOException { if (si.hasUpdates()) { - updates = new SegmentCoreReaders[(int) si.getUpdateGen()]; - for (int i = 0; i < updates.length; i++) { - updates[i] = new SegmentCoreReaders(this, si, i + 1, context, + SegmentCoreReaders[] newUpdates = new SegmentCoreReaders[(int) si + .getUpdateGen()]; + for (int i = 0; i < newUpdates.length; i++) { + newUpdates[i] = new SegmentCoreReaders(this, si.info, i + 1, context, termInfosIndexDivisor); } - return; + return newUpdates; } - updates = null; + return null; } @Override @@ -453,7 +454,16 @@ @Override public NumericDocValues getNormValues(String field) throws IOException { ensureOpen(); - return core.getNormValues(field); + NumericDocValues normValues = core.getNormValues(field); + if (updates != null) { + for (final SegmentCoreReaders updateReader : updates) { + NumericDocValues updateNormValues = updateReader.getNormValues(field); + if (updateNormValues != null) { + normValues = updateNormValues; + } + } + } + return normValues; } /** Index: lucene/core/src/java/org/apache/lucene/index/StackedTermsEnum.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StackedTermsEnum.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/StackedTermsEnum.java (working copy) @@ -87,7 +87,7 @@ } private void init() throws IOException { - activeEnums = new TreeSet(); + activeEnums = new TreeSet(new InnerTermsEnumFullComparator()); for (int i = 0; i < subTerms.length; i++) { if (subTerms[i] != null) { final TermsEnum termsEnum = subTerms[i].iterator(null); @@ -188,6 +188,14 @@ } } + if (activeMap.isEmpty()) { + return null; + } + + if (replacements == null && activeMap.size() == 1) { + return activeMap.keySet().iterator().next(); + } + return new StackedDocsEnum(activeMap, replacements); } @@ -208,6 +216,10 @@ return null; } + if (replacements == null && activeMap.size() == 1) { + return (DocsAndPositionsEnum) activeMap.keySet().iterator().next(); + } + return new StackedDocsEnum(activeMap, replacements); } @@ -238,9 +250,29 @@ @Override public int compareTo(InnerTermsEnum o) { - return comparator.compare(this.term, o.term); + int diff = comparator.compare(this.term, o.term); + if (diff != 0) { + return diff; + } + return this.index - o.index; + } } - + + /** + * A comparator which + */ + private class InnerTermsEnumFullComparator implements Comparator { + + @Override + public int compare(InnerTermsEnum arg0, InnerTermsEnum arg1) { + int diff = comparator.compare(arg0.term, arg1.term); + if (diff != 0) { + return diff; + } + return arg0.index - arg1.index; + } + + } } Index: lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java (working copy) @@ -172,7 +172,8 @@ // Steal the ref returned by SegmentReader ctor: assert infos.info(i).info.dir == newReaders[i].getSegmentInfo().info.dir; assert infos.info(i).hasDeletions(); - newReaders[i] = new SegmentReader(infos.info(i), newReaders[i].core, IOContext.READ); + newReaders[i] = new SegmentReader(infos.info(i), + newReaders[i].core, newReaders[i].updates, IOContext.READ); } } success = true; Index: lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java (working copy) @@ -19,10 +19,12 @@ import java.io.IOException; +import org.apache.lucene.store.Directory; + abstract class StoredFieldsConsumer { - abstract void addField(int docID, StorableField field, FieldInfo fieldInfo) throws IOException; - abstract void flush(SegmentWriteState state) throws IOException; - abstract void abort() throws IOException; - abstract void startDocument() throws IOException; - abstract void finishDocument() throws IOException; + abstract void addField(int docID, StorableField field, FieldInfo fieldInfo) throws IOException; + abstract void flush(SegmentWriteState state) throws IOException; + abstract void abort() throws IOException; + abstract void startDocument() throws IOException; + abstract void finishDocument(Directory directory, SegmentInfo info) throws IOException; } Index: lucene/core/src/java/org/apache/lucene/index/StoredFieldsProcessor.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StoredFieldsProcessor.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/StoredFieldsProcessor.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -66,7 +67,7 @@ // It's possible that all documents seen in this segment // hit non-aborting exceptions, in which case we will // not have yet init'd the FieldsWriter: - initFieldsWriter(state.context); + initFieldsWriter(state.context, state.directory, state.segmentInfo); fill(numDocs); } @@ -81,9 +82,9 @@ } } - private synchronized void initFieldsWriter(IOContext context) throws IOException { + private synchronized void initFieldsWriter(IOContext context, Directory directory, SegmentInfo info) throws IOException { if (fieldsWriter == null) { - fieldsWriter = codec.storedFieldsFormat().fieldsWriter(docWriter.directory, docWriter.getSegmentInfo(), context); + fieldsWriter = codec.storedFieldsFormat().fieldsWriter(directory, info, context); lastDocID = 0; } } @@ -113,10 +114,10 @@ } @Override - void finishDocument() throws IOException { + void finishDocument(Directory directory, SegmentInfo info) throws IOException { assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start"); - initFieldsWriter(IOContext.DEFAULT); + initFieldsWriter(IOContext.DEFAULT, directory, info); fill(docState.docID); if (fieldsWriter != null && numStoredFields > 0) { Index: lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (working copy) @@ -21,6 +21,7 @@ import java.util.Map; import org.apache.lucene.codecs.TermVectorsWriter; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.ArrayUtil; @@ -83,16 +84,16 @@ } } - private final void initTermVectorsWriter() throws IOException { + private final void initTermVectorsWriter(Directory directory, SegmentInfo info) throws IOException { if (writer == null) { IOContext context = new IOContext(new FlushInfo(docWriter.getNumDocsInRAM(), docWriter.bytesUsed())); - writer = docWriter.codec.termVectorsFormat().vectorsWriter(docWriter.directory, docWriter.getSegmentInfo(), context); + writer = docWriter.codec.termVectorsFormat().vectorsWriter(directory, info, context); lastDocID = 0; } } @Override - void finishDocument(TermsHash termsHash) throws IOException { + void finishDocument(TermsHash termsHash, Directory directory, SegmentInfo info) throws IOException { assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start"); @@ -100,7 +101,7 @@ return; } - initTermVectorsWriter(); + initTermVectorsWriter(directory, info); fill(docState.docID); Index: lucene/core/src/java/org/apache/lucene/index/TermsHash.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/TermsHash.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/TermsHash.java (working copy) @@ -21,6 +21,7 @@ import java.util.HashMap; import java.util.Map; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; @@ -126,10 +127,10 @@ } @Override - void finishDocument() throws IOException { - consumer.finishDocument(this); + void finishDocument(Directory directory, SegmentInfo info) throws IOException { + consumer.finishDocument(this, directory, info); if (nextTermsHash != null) { - nextTermsHash.consumer.finishDocument(nextTermsHash); + nextTermsHash.consumer.finishDocument(nextTermsHash, directory, info); } } Index: lucene/core/src/java/org/apache/lucene/index/TermsHashConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/TermsHashConsumer.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/TermsHashConsumer.java (working copy) @@ -20,10 +20,12 @@ import java.io.IOException; import java.util.Map; +import org.apache.lucene.store.Directory; + abstract class TermsHashConsumer { abstract void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException; abstract void abort(); abstract void startDocument() throws IOException; - abstract void finishDocument(TermsHash termsHash) throws IOException; + abstract void finishDocument(TermsHash termsHash, Directory directory, SegmentInfo info) throws IOException; abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); } Index: lucene/core/src/java/org/apache/lucene/index/TwoStoredFieldsConsumers.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/TwoStoredFieldsConsumers.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/TwoStoredFieldsConsumers.java (working copy) @@ -19,6 +19,8 @@ import java.io.IOException; +import org.apache.lucene.store.Directory; + /** Just switches between two {@link DocFieldConsumer}s. */ class TwoStoredFieldsConsumers extends StoredFieldsConsumer { @@ -61,8 +63,8 @@ } @Override - void finishDocument() throws IOException { - first.finishDocument(); - second.finishDocument(); + void finishDocument(Directory directory, SegmentInfo info) throws IOException { + first.finishDocument(directory, info); + second.finishDocument(directory, info); } } Index: lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (revision 1454908) +++ lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (working copy) @@ -1,20 +1,17 @@ package org.apache.lucene.index; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.SortedSet; +import java.util.PriorityQueue; import java.util.TreeMap; -import java.util.TreeSet; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.StoredField; -import org.apache.lucene.index.FieldsUpdate.Operation; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.Bits; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -38,42 +35,57 @@ */ class UpdatedSegmentData { + static final FieldInfos EMPTY_FIELD_INFOS = new FieldInfos(new FieldInfo[0]); + /** Updates mapped by doc ID, for each do sorted list of updates. */ - private TreeMap> updatesMap; + private TreeMap> updatesMap; - public long generation; + /** */ + private long generation; - private Map fieldGenerationReplacments; + private Map fieldGenerationReplacments = new HashMap(); - private Iterator>> updatesIterator; + private Iterator>> updatesIterator; private int currDocID; private int nextDocID; private int numDocs; - private SortedSet nextUpdate; + private PriorityQueue nextUpdate; private Analyzer analyzer; + private int termsIndexDivisor; + UpdatedSegmentData() { - updatesMap = new TreeMap>(); + updatesMap = new TreeMap>(); } - void addUpdate(int docID, FieldsUpdate update) { - SortedSet prevUpdates = updatesMap.get(docID); + void addUpdate(int docID, FieldsUpdate fieldsUpdate) { + if (docID >= fieldsUpdate.docIDUpto) { + return; + } + PriorityQueue prevUpdates = updatesMap.get(docID); if (prevUpdates == null) { - prevUpdates = new TreeSet(); + prevUpdates = new PriorityQueue(); updatesMap.put(docID, prevUpdates); + } else { + System.out.println(); } - prevUpdates.add(update); + prevUpdates.add(fieldsUpdate); } void addUpdates(int docID, FieldsUpdate[] updatesArray) { - SortedSet prevUpdates = updatesMap.get(docID); + PriorityQueue prevUpdates = updatesMap.get(docID); if (prevUpdates == null) { - prevUpdates = new TreeSet(); - updatesMap.put(docID, prevUpdates); + prevUpdates = new PriorityQueue(); } for (int i = 0; i < updatesArray.length; i++) { - prevUpdates.add(updatesArray[i]); + FieldsUpdate fieldsUpdate = updatesArray[i]; + if (docID < fieldsUpdate.docIDUpto) { + prevUpdates.add(fieldsUpdate); + } } + if (!prevUpdates.isEmpty()) { + updatesMap.put(docID, prevUpdates); + } } boolean hasUpdates() { @@ -87,12 +99,16 @@ * The updates generation. * @param numDocs * number of documents in the base segment + * @param termsIndexDivisor + * Terms index divisor to use in temporary segments */ - void startWriting(long generation, int numDocs) { + void startWriting(long generation, int numDocs, int termsIndexDivisor) { this.generation = generation; this.numDocs = numDocs; + this.termsIndexDivisor = termsIndexDivisor; updatesIterator = updatesMap.entrySet().iterator(); currDocID = 0; + fieldGenerationReplacments.clear(); // fetch the first actual updates document if exists nextDocUpdate(); } @@ -102,7 +118,7 @@ */ private void nextDocUpdate() { if (updatesIterator.hasNext()) { - Entry> docUpdates = updatesIterator + Entry> docUpdates = updatesIterator .next(); nextDocID = docUpdates.getKey(); nextUpdate = docUpdates.getValue(); @@ -112,39 +128,6 @@ } } - /** - * Get the next document to put in the updates index, could be an empty - * document. Updates the analyzer. - * - * @throws IOException - * If different analyzers were assigned to field updates affecting - * the next document. - */ - IndexDocument nextDocument() throws IOException { - IndexDocument toReturn = null; - if (currDocID < nextDocID) { - // empty document required - if (currDocID == numDocs - 1) { - // add document with stored field for getting right size of segment when - // reading stored documents - toReturn = STORED_FIELD_DOCUMENT; - } else { - toReturn = EMPTY_DOCUMENT; - } - } else if (currDocID < numDocs) { - // return an actual updates document... - toReturn = new UpdatesIndexDocument(nextUpdate); - // ... and fetch the next one if exists - nextDocUpdate(); - } else { - // no more documents required - return null; - } - - currDocID++; - return toReturn; - } - Analyzer getAnalyzer() { return analyzer; } @@ -153,136 +136,172 @@ return fieldGenerationReplacments; } - /** - * An {@link IndexDocument} containing all the updates to a certain document - * in a stacked segment, taking into account replacements. - *

- * Constructing an {@link UpdatesIndexDocument} also updates the containing - * {@link UpdatedSegmentData}'s analyzer and its - * {@link FieldGenerationReplacements} vectors for the relevant fields. - */ - private class UpdatesIndexDocument implements IndexDocument { - - Map> indexablesByField = new HashMap>(); - Map> storablesByField = new HashMap>(); - - public UpdatesIndexDocument(SortedSet fieldsUpdates) - throws IOException { - boolean setAnalyzer = true; - analyzer = null; - for (FieldsUpdate fieldsUpdate : fieldsUpdates) { - // set analyzer and check for analyzer conflict - if (setAnalyzer) { - analyzer = fieldsUpdate.analyzer; - setAnalyzer = false; - } else if (analyzer != fieldsUpdate.analyzer) { - throw new IOException( - "two analyzers assigned to one updated document"); - } - - if (fieldsUpdate.operation == Operation.REPLACE_FIELDS) { - // handle fields replacement - for (IndexableField field : fieldsUpdate.fields.indexableFields()) { - replaceField(field.name()); + AtomicReader nextReader() throws IOException { + AtomicReader toReturn = null; + if (currDocID < nextDocID) { + // empty documents reader required + toReturn = new UpdateAtomicReader(nextDocID - currDocID); + currDocID = nextDocID; + } else if (currDocID < numDocs) { + // get the an actual updates reader... + FieldsUpdate update = nextUpdate.poll(); + toReturn = new UpdateAtomicReader(update.directory, update.segmentInfo, + IOContext.DEFAULT); + + // ... and if done for this document remove from updates map + if (nextUpdate.isEmpty()) { + updatesIterator.remove(); + } + + // add generation replacements if exist + if (update.replacedFields != null) { + for (String fieldName : update.replacedFields) { + FieldGenerationReplacements fieldReplacement = fieldGenerationReplacments + .get(fieldName); + if (fieldReplacement == null) { + fieldReplacement = new FieldGenerationReplacements(); + fieldGenerationReplacments.put(fieldName, fieldReplacement); } - for (StorableField field : fieldsUpdate.fields.storableFields()) { - replaceField(field.name()); - } + fieldReplacement.set(currDocID, generation); } - - // add new fields - for (IndexableField field : fieldsUpdate.fields.indexableFields()) { - List fieldList = indexablesByField.get(field.name()); - if (fieldList == null) { - fieldList = new ArrayList(); - indexablesByField.put(field.name(), fieldList); - } - fieldList.add(field); - } - for (StorableField field : fieldsUpdate.fields.storableFields()) { - List fieldList = storablesByField.get(field.name()); - if (fieldList == null) { - fieldList = new ArrayList(); - storablesByField.put(field.name(), fieldList); - } - fieldList.add(field); - } } + // move to next doc id + nextDocUpdate(); + currDocID++; } - private void replaceField(String fieldName) { - // remove previous fields - indexablesByField.remove(fieldName); - storablesByField.remove(fieldName); - - // update field generation replacement vector - if (fieldGenerationReplacments == null) { - fieldGenerationReplacments = new HashMap(); + return toReturn; + } + + private class UpdateAtomicReader extends AtomicReader { + + final private SegmentCoreReaders core; + final private int numDocs; + + /** + * Constructor with fields directory, for actual updates. + * + * @param fieldsDir + * Directory with inverted fields. + * @param segmentInfo + * Info of the inverted fields segment. + * @param context + * IOContext to use. + * @throws IOException + * If cannot create the reader. + */ + UpdateAtomicReader(Directory fieldsDir, SegmentInfo segmentInfo, + IOContext context) throws IOException { + core = new SegmentCoreReaders(null, segmentInfo, -1, context, + termsIndexDivisor); + numDocs = 1; + } + + /** + * Constructor with fields directory, for actual updates. + */ + UpdateAtomicReader(int numDocs) { + core = null; + this.numDocs = numDocs; + } + + @Override + public Fields fields() throws IOException { + if (core == null) { + return null; } - FieldGenerationReplacements fieldReplacement = fieldGenerationReplacments - .get(fieldName); - if (fieldReplacement == null) { - fieldReplacement = new FieldGenerationReplacements(); - fieldGenerationReplacments.put(fieldName, fieldReplacement); - } - fieldReplacement.set(currDocID, generation); + return core.fields; } @Override - public Iterable indexableFields() { - List indexableFields = new ArrayList(); - for (List byField : indexablesByField.values()) { - indexableFields.addAll(byField); + public FieldInfos getFieldInfos() { + if (core == null) { + return EMPTY_FIELD_INFOS; } - return indexableFields; + return core.fieldInfos; } @Override - public Iterable storableFields() { - List storableFields = new ArrayList(); - for (List byField : storablesByField.values()) { - storableFields.addAll(byField); + public Bits getLiveDocs() { + return null; + } + + @Override + public Fields getTermVectors(int docID) throws IOException { + if (core == null) { + return null; } - return storableFields; + return core.termVectorsLocal.get().get(docID); } - } - - /** - * An empty document to be used as filler to maintain doc IDs in stacked - * segments. - */ - private static final IndexDocument EMPTY_DOCUMENT = new IndexDocument() { @Override - public Iterable storableFields() { - return Collections.emptyList(); + public int numDocs() { + return numDocs; } @Override - public Iterable indexableFields() { - return Collections.emptyList(); + public int maxDoc() { + return numDocs; } - }; - - private static final ArrayList STORED_FIELD_LIST = new ArrayList( - 1); - static { - STORED_FIELD_LIST.add(new StoredField("dummy", "")); - } - - /** - * A document containing only one stored field to be used as the last document - * in stacked segments. - */ - private static final IndexDocument STORED_FIELD_DOCUMENT = new IndexDocument() { + @Override - public Iterable storableFields() { - return STORED_FIELD_LIST; + public void document(int docID, StoredFieldVisitor visitor) + throws IOException { + if (core == null) { + return; + } + core.fieldsReaderLocal.get().visitDocument(docID, visitor, null); } @Override - public Iterable indexableFields() { - return Collections.emptyList(); + public boolean hasDeletions() { + return false; } - }; + + @Override + protected void doClose() throws IOException {} + + @Override + public NumericDocValues getNumericDocValues(String field) + throws IOException { + if (core == null) { + return null; + } + return core.getNumericDocValues(field); + } + + @Override + public BinaryDocValues getBinaryDocValues(String field) throws IOException { + if (core == null) { + return null; + } + return core.getBinaryDocValues(field); + } + + @Override + public SortedDocValues getSortedDocValues(String field) throws IOException { + if (core == null) { + return null; + } + return core.getSortedDocValues(field); + } + + @Override + public SortedSetDocValues getSortedSetDocValues(String field) + throws IOException { + if (core == null) { + return null; + } + return core.getSortedSetDocValues(field); + } + + @Override + public NumericDocValues getNormValues(String field) throws IOException { + if (core == null) { + return null; + } + return core.getNormValues(field); + } + + } } Index: lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (revision 1454908) +++ lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (working copy) @@ -17,13 +17,17 @@ * limitations under the License. */ +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.Locale; +import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.FieldType; @@ -32,7 +36,10 @@ import org.apache.lucene.index.FieldsUpdate.Operation; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IndexData; import org.apache.lucene.util.LuceneTestCase; public class TestFieldReplacements extends LuceneTestCase { @@ -61,6 +68,7 @@ public void setUp() throws Exception { super.setUp(); dir = newDirectory(); + // init fields data structures int numFields = 4 + random().nextInt(4); @@ -77,7 +85,7 @@ fieldTokens[i] = tokens.toArray(new String[tokens.size()]); } } - + @Override public void tearDown() throws Exception { dir.close(); @@ -85,19 +93,21 @@ } public void testEmptyIndex() throws IOException { - // test performing fields addition and replace on an empty index + init(random()); + + // test performing fields addition and replacement on an empty index IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); HashSet usedTerms = new HashSet(); Operation operation = Operation.REPLACE_FIELDS; - writer.updateFields(operation, getOperationTerm(usedTerms), - getFields(usedTerms)); + writer.updateFields(operation, getOperationTerm(usedTerms, random()), + getFields(usedTerms, random())); operation = Operation.ADD_FIELDS; - writer.updateFields(operation, getOperationTerm(usedTerms), - getFields(usedTerms)); + writer.updateFields(operation, getOperationTerm(usedTerms, random()), + getFields(usedTerms, random())); writer.close(); @@ -106,23 +116,39 @@ directoryReader.close(); } - private void addDocuments() throws IOException { - + private static void init(Random localRandom) { + int numFields = 4 + localRandom.nextInt(4); + fieldNames = new String[numFields]; + fieldTokens = new String[numFields][]; + for (int i = 0; i < numFields; i++) { + fieldNames[i] = "f" + i; + ArrayList tokens = new ArrayList(); + final String[] allTokens = loremIpsum.split("\\s"); + for (int index = localRandom.nextInt(2 + i); index < allTokens.length; index += 1 + localRandom + .nextInt(2 + i)) { + tokens.add(allTokens[index].toLowerCase()); + } + fieldTokens[i] = tokens.toArray(new String[tokens.size()]); + } + } + + private static void addDocuments(Directory directory, Random localRandom, + int maxDocs) throws IOException { + init(localRandom); HashSet usedTerms = new HashSet(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random())); + config.setCodec(new SimpleTextCodec()); + IndexWriter writer = new IndexWriter(directory, config); // add random documents - int numDocs = 10 + random().nextInt(50); + int numOps = 10 + localRandom.nextInt(50); int nCommits = 0; - for (int i = 0; i < numDocs; i++) { + for (int i = 0; i < Math.min(maxDocs, numOps); i++) { - // create fields - Document fields = getFields(usedTerms); - // select operation - int opIndex = random().nextInt(10); + int opIndex = localRandom.nextInt(10); Operation operation; if (opIndex <= 1) { if (opIndex == 0) { @@ -135,26 +161,29 @@ } // create term if needed - Term term = getOperationTerm(usedTerms); + Term term = getOperationTerm(usedTerms, localRandom); - writer.updateFields(operation, term, fields); + // create fields and update + writer.updateFields(operation, term, getFields(usedTerms, localRandom)); } else { if (opIndex == 2) { if (VERBOSE_FIELD_REPLACEMENTS) { System.out.println("REPLACE_DOCUMENTS"); } - Term term = getOperationTerm(usedTerms); - writer.replaceDocument(term, fields); + Term term = getOperationTerm(usedTerms, localRandom); + // create document and replace + writer.replaceDocument(term, getFields(usedTerms, localRandom)); } else { if (VERBOSE_FIELD_REPLACEMENTS) { System.out.println("ADD_DOCUMENT"); } - writer.addDocument(fields); + // create document and add + writer.addDocument(getFields(usedTerms, localRandom)); } } // commit about once every 10 docs - int interCommit = random().nextInt(10); + int interCommit = localRandom.nextInt(10); if (interCommit == 0) { if (VERBOSE_FIELD_REPLACEMENTS) { System.out.println("commit " + (++nCommits)); @@ -168,16 +197,16 @@ writer.close(); } - public Document getFields(HashSet usedTerms) { + public static Document getFields(HashSet usedTerms, Random loaclRandom) { Document fields = new Document(); - int nFields = 1 + random().nextInt(5); + int nFields = 1 + loaclRandom.nextInt(5); for (int j = 0; j < nFields; j++) { - boolean indexed = random().nextInt(8) > 0; - int index = random().nextInt(fieldNames.length); + boolean indexed = loaclRandom.nextInt(8) > 0; + int index = loaclRandom.nextInt(fieldNames.length); String fieldName = fieldNames[index]; String value = createFieldValue(fieldTokens[index], fieldName, indexed, - usedTerms); + usedTerms, loaclRandom); if (indexed) { fields.add(new TextField(fieldName, value, Store.NO)); @@ -197,20 +226,20 @@ return fields; } - public Term getOperationTerm(HashSet usedTerms) { + public static Term getOperationTerm(HashSet usedTerms, Random loaclRandom) { Term term = null; - boolean used = random().nextInt(5) < 4; + boolean used = loaclRandom.nextInt(5) < 4; if (used && !usedTerms.isEmpty()) { final Iterator iterator = usedTerms.iterator(); - int usedIndex = random().nextInt(usedTerms.size()); + int usedIndex = loaclRandom.nextInt(usedTerms.size()); for (int j = 0; j < usedIndex; j++) { iterator.next(); } term = iterator.next(); } else { // select term - int fieldIndex = random().nextInt(fieldNames.length); - int textIndex = random().nextInt(fieldTokens[fieldIndex].length / 10); + int fieldIndex = loaclRandom.nextInt(fieldNames.length); + int textIndex = loaclRandom.nextInt(fieldTokens[fieldIndex].length / 10); term = new Term(fieldNames[fieldIndex], fieldTokens[fieldIndex][textIndex]); } @@ -220,11 +249,11 @@ return term; } - private String createFieldValue(String[] tokens, String fieldName, - boolean indexed, HashSet usedTerms) { + private static String createFieldValue(String[] tokens, String fieldName, + boolean indexed, HashSet usedTerms, Random loaclRandom) { StringBuilder builder = new StringBuilder(); - int index = random().nextInt(Math.min(10, tokens.length)); + int index = loaclRandom.nextInt(Math.min(10, tokens.length)); while (index < tokens.length) { builder.append(tokens[index]); @@ -232,19 +261,217 @@ if (indexed) { usedTerms.add(new Term(fieldName, tokens[index])); } - index += 1 + random().nextInt(10); + index += 1 + loaclRandom.nextInt(10); } return builder.toString(); } public void testRandomIndexGeneration() throws IOException { - addDocuments(); + addDocuments(dir, random(), Integer.MAX_VALUE); DirectoryReader directoryReader = DirectoryReader.open(dir); directoryReader.close(); } - public void testStatisticsAfterFieldUpdates() throws IOException { + public void testAddIndexes() throws IOException { + addDocuments(dir, random(), Integer.MAX_VALUE); + RAMDirectory addedDir = new RAMDirectory(); + IndexWriter addedIndexWriter = new IndexWriter(addedDir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + addedIndexWriter.addIndexes(dir); + addedIndexWriter.close(); + + DirectoryReader updatesReader = DirectoryReader.open(dir); + IndexData updatesIndexData = new IndexData(updatesReader); + updatesReader.close(); + + DirectoryReader addedReader = DirectoryReader.open(addedDir); + IndexData addedIndexData = new IndexData(addedReader); + addedReader.close(); + addedDir.close(); + + assertEquals("Difference in addIndexes ", updatesIndexData, addedIndexData); + } + + + public void testIndexEquality() throws IOException { + // create index through updates + addDocuments(dir, new Random(3), Integer.MAX_VALUE); + + DirectoryReader updatesReader = DirectoryReader.open(dir); + IndexData updatesIndexData = new IndexData(updatesReader); + System.out.println("Updates index data"); + System.out.println(updatesIndexData.toString(true)); + System.out.println(); + updatesReader.close(); + + // create the same index directly + RAMDirectory directDir = new RAMDirectory(); + IndexWriter directWriter = new IndexWriter(directDir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + + Document doc = new Document(); + doc.add(new StoredField("f0","elit, magna volutpat. tation ea dolor consequat, facilisis odio te soluta doming facer qui me consuetudium littera per nunc ")); + doc.add(new TextField("f4","consectetuer tincidunt erat nostrud hendrerit dignissim claritatem me etiam quam claram, ", Store.NO)); + doc.add(new TextField("f3","nibh iriure qui liber claritatem. claram, seacula videntur sollemnes ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f4","diam eum nulla nulla claritatem. mutationem claram, ", Store.NO)); + doc.add(new TextField("f4","amet, erat eum delenit iis claritatem. claram, et fiant ", Store.NO)); + doc.add(new StoredField("f0","dolore quis duis iriure illum accumsan blandit tempor nihil facer assum. qui lectores dynamicus, claram, quinta qui sollemnes ")); + doc.add(new StoredField("f1","elit, dolore aliquip dolore et facilisi. nobis placerat demonstraverunt processus qui littera eodem clari, ")); + doc.add(new TextField("f1","nonummy ad commodo at te eleifend congue doming in demonstraverunt consuetudium est eodem ", Store.NO)); + doc.add(new TextField("f3","dolore volutpat. exerci nisl consequat, delenit liber nobis qui lectores saepius. et fiant ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f5","magna dolore luptatum claritatem investigationes quod per ", Store.NO)); + doc.add(new TextField("f2","elit, sed dolore aliquip commodo eum dignissim feugait doming habent insitam; legunt est qui quarta parum ", Store.NO)); + doc.add(new StoredField("f3","nibh volutpat. in facilisis accumsan luptatum mazim lectores sequitur anteposuerit sollemnes ")); + doc.add(new TextField("f2","euismod suscipit eum dolor molestie at qui duis doming in lius qui notare nunc ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f4","tincidunt velit facilisis dignissim cum iis claram, ", Store.NO)); + doc.add(new StoredField("f4","ullamcorper accumsan delenit dolore nihil claritatem. mutationem clari, ")); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f3","exerci ea esse consequat, facilisis praesent placerat dynamicus, seacula qui ", Store.NO)); + doc.add(new TextField("f2","sed nonummy erat duis eum iriure dignissim duis nam assum. insitam; qui quam nunc futurum. ", Store.NO)); + doc.add(new TextField("f5","velit luptatum augue placerat quam ", Store.NO)); + doc.add(new TextField("f3","minim commodo facilisis qui imperdiet ii claritas seacula ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f2","tincidunt suscipit dolor eu dignissim delenit congue possim lius anteposuerit in ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f5","consectetuer illum eleifend processus fiant ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f1","volutpat. minim aliquip duis dolore zzril congue in saepius. dynamicus, qui est eodem qui futurum. ", Store.NO)); + doc.add(new TextField("f0","ut quis duis eum hendrerit dolore odio feugait option doming mazim possim usus claritatem. legunt mirum litterarum qui sollemnes futurum. ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f0","nibh ut ut minim exerci ea duis esse et blandit luptatum facilisi. soluta doming quod typi usus quod dynamicus, consuetudium mirum quam quarta clari, in ", Store.NO)); + doc.add(new TextField("f4","wisi facilisis claritatem iis lius mutationem qui ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f4","nibh ullamcorper ea dignissim usus mutationem quarta ")); + doc.add(new StoredField("f4","consectetuer wisi ea illum facilisis assum. mutationem quarta clari, ")); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f5","velit tempor processus putamus et typi, ", Store.NO)); + doc.add(new TextField("f0","nibh dolore exerci eum esse feugiat facilisis iusto dolore cum quod non facit legunt quam claram, litterarum nunc ", Store.NO)); + doc.add(new TextField("f4","consectetuer ullamcorper eum dignissim dolore assum. est littera in ", Store.NO)); + doc.add(new TextField("f2","ipsum ad duis dolor eu at nam doming habent eorum me consuetudium decima futurum. ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f3","adipiscing wisi nisl consequat, dignissim nobis qui mirum fiant sollemnes ", Store.NO)); + doc.add(new TextField("f0","euismod ut ad nisl dolor eu blandit te eleifend nihil typi qui lectores claritas consuetudium gothica, claram, decima sollemnes ", Store.NO)); + doc.add(new TextField("f0","ut erat ut nisl ea dolor velit vel eros odio qui feugait facilisi. nihil assum. usus ii legunt littera decima nobis sollemnes ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f5","velit tempor legentis mirum fiant ", Store.NO)); + doc.add(new TextField("f0","nibh dolore exerci eum esse feugiat facilisis iusto dolore cum quod non facit legunt quam claram, litterarum nunc ", Store.NO)); + doc.add(new TextField("f4","consectetuer ullamcorper eum dignissim dolore assum. est littera in ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f2","dolore tation duis in eu delenit nam placerat in qui quarta ", Store.NO)); + doc.add(new TextField("f2","ut suscipit duis at dignissim delenit soluta insitam; me quam qui futurum. ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f0","diam euismod quis autem consequat, eros iusto delenit feugait option quod habent claritatem claritatem. lectores consuetudium nunc per qui sollemnes ", Store.NO)); + doc.add(new TextField("f4","nibh tincidunt hendrerit nulla usus est quam qui ", Store.NO)); + doc.add(new TextField("f1","adipiscing diam nostrud duis at zzril te nobis congue est demonstraverunt lius consuetudium est claram, qui in ", Store.NO)); + doc.add(new StoredField("f0","nibh euismod magna erat suscipit duis dolor esse et delenit tempor quod typi in legunt littera nunc decima. in ")); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f1","diam nonummy tincidunt lobortis dolor et luptatum liber cum doming quod assum. habent insitam; est ii littera per et qui ", Store.NO)); + doc.add(new TextField("f4","diam ullamcorper dignissim assum. claritatem. me etiam qui clari, ", Store.NO)); + doc.add(new TextField("f0","ipsum ut volutpat. minim autem dolor vulputate vel dolore odio blandit cum nobis mazim placerat facer possim est lectores sequitur consuetudium claram, modo qui in ", Store.NO)); + doc.add(new TextField("f2","tincidunt nisl duis in zzril placerat habent qui parum litterarum qui ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f3","nibh ea consequat, accumsan tempor est dynamicus, seacula typi, videntur sollemnes ")); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f1","volutpat. duis dolor esse at iusto delenit doming est facit est consuetudium humanitatis sollemnes ", Store.NO)); + doc.add(new TextField("f2","ipsum ut nisl dolor dignissim nam placerat investigationes processus notare nunc in ", Store.NO)); + doc.add(new TextField("f1","ipsum tincidunt nostrud lobortis in vel nulla dolore placerat facit ii quam littera formas nunc clari, ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f3","adipiscing ea consequat, qui nobis ii mirum et sollemnes ", Store.NO)); + doc.add(new TextField("f5","velit te legere typi, ", Store.NO)); + doc.add(new TextField("f3","nisl in dignissim delenit placerat est claritatem. notare anteposuerit et videntur ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f1","dolore nostrud suscipit lobortis duis vel et delenit liber cum habent usus claritatem. qui formas nobis sollemnes futurum. ", Store.NO)); + doc.add(new TextField("f2","erat tation duis in molestie dignissim liber congue possim me qui litterarum eodem in ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f0","amet, elit, ut minim duis eum esse vel eu iusto blandit nam eleifend nihil typi usus facit legunt notare litterarum per decima clari, ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f2","euismod suscipit in velit delenit facer legunt quam formas parum ", Store.NO)); + doc.add(new StoredField("f4","ullamcorper accumsan delenit insitam; lius mutationem quarta decima. ")); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f3","ipsum adipiscing wisi nisl consequat, praesent placerat qui saepius. dynamicus, seacula videntur ")); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new StoredField("f0","consectetuer erat minim suscipit ea esse consequat, feugiat accumsan duis cum nihil typi claritatem facit etiam quam claram, quinta modo futurum. ")); + doc.add(new TextField("f4","tincidunt illum cum claritatem. mutationem litterarum ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f2","tincidunt erat ad aliquip duis velit dignissim delenit facer insitam; processus qui litterarum formas quarta ", Store.NO)); + doc.add(new StoredField("f2","erat ut nisl duis at feugait congue in lius anteposuerit nunc ")); + doc.add(new TextField("f3","adipiscing minim esse luptatum tempor imperdiet est saepius. seacula fiant ", Store.NO)); + doc.add(new TextField("f0","ipsum elit, magna suscipit dolor eu iusto feugait eleifend quod assum. non est investigationes claritas nunc seacula videntur ", Store.NO)); + doc.add(new TextField("f4","amet, nibh ullamcorper velit nulla dignissim quod insitam; lius decima. ", Store.NO)); + directWriter.addDocument(doc); + + doc = new Document(); + doc.add(new TextField("f5","quis dolore eleifend investigationes mirum per eodem typi, ", Store.NO)); + directWriter.addDocument(doc); + + directWriter.close(); + DirectoryReader directReader = DirectoryReader.open(directDir); + + IndexData directIndexData = new IndexData(directReader); + System.out.println("Direct index data"); + System.out.println(directIndexData.toString(true)); + System.out.println(); + directReader.close(); + directDir.close(); + + boolean equalsNoOrder = IndexData.equalsNoOrder(directIndexData, + updatesIndexData); + assertTrue("indexes differ", equalsNoOrder); + } + + public void testReplaceAndAddAgain() throws IOException { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); @@ -264,14 +491,14 @@ doc1.add(new StoredField("f1", "c", fieldType)); writer.addDocument(doc1); + Document doc3 = new Document(); + doc3.add(new StoredField("f1", "d", fieldType)); + writer.updateFields(Operation.REPLACE_FIELDS, new Term("f1", "b"), doc3); + Document doc2 = new Document(); doc2.add(new StoredField("f1", "b", fieldType)); writer.addDocument(doc2); - Document doc3 = new Document(); - doc3.add(new StoredField("f1", "d", fieldType)); - writer.updateFields(Operation.REPLACE_FIELDS, new Term("f1", "b"), doc3); - writer.close(); DirectoryReader directoryReader = DirectoryReader.open(dir); @@ -288,6 +515,8 @@ final DocsAndPositionsEnum termPositionsB = atomicReader .termPositionsEnum(new Term("f1", "b")); + assertEquals("wrong doc id", 2, termPositionsB.nextDoc()); + assertEquals("wrong position", 0, termPositionsB.nextPosition()); assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, termPositionsB.nextDoc()); @@ -302,8 +531,6 @@ .termPositionsEnum(new Term("f1", "d")); assertEquals("wrong doc id", 0, termPositionsD.nextDoc()); assertEquals("wrong position", 0, termPositionsD.nextPosition()); - assertEquals("wrong doc id", 2, termPositionsD.nextDoc()); - assertEquals("wrong position", 0, termPositionsD.nextPosition()); assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, termPositionsD.nextDoc()); @@ -312,7 +539,7 @@ final StorableField[] f1_0 = stored0.getFields("f1"); assertEquals("wrong numeber of stored fields", 1, f1_0.length); assertEquals("wrong field value", "d", f1_0[0].stringValue()); - + final StoredDocument stored1 = atomicReader.document(1); final StorableField[] f1_1 = stored1.getFields("f1"); assertEquals("wrong numeber of stored fields", 2, f1_1.length); @@ -322,8 +549,8 @@ final StoredDocument stored2 = atomicReader.document(2); final StorableField[] f1_2 = stored2.getFields("f1"); assertEquals("wrong numeber of stored fields", 1, f1_2.length); - assertEquals("wrong field value", "d", f1_2[0].stringValue()); - + assertEquals("wrong field value", "b", f1_2[0].stringValue()); + directoryReader.close(); } @@ -351,4 +578,31 @@ } } } + + public void testprintIndexes() throws IOException { + File outDir = new File("D:/temp/ifu/compare/scenario/c"); + outDir.mkdirs(); + + for (int i = 0; i < 42; i++) { + //Directory directory = new RAMDirectory(); + File fsDirFile = new File(outDir, "" + i); + fsDirFile.mkdirs(); + Directory directory = FSDirectory.open(fsDirFile); + for (String filename : directory.listAll()) { + new File(fsDirFile, filename).delete(); + } + addDocuments(directory, new Random(3), i); + DirectoryReader updatesReader = DirectoryReader.open(directory); + IndexData updatesIndexData = new IndexData( + updatesReader); + updatesReader.close(); + + File out = new File(outDir, (i < 10 ? "0" : "") + i + ".txt"); + FileWriter fileWriter = new FileWriter(out); + fileWriter.append(updatesIndexData.toString()); + fileWriter.close(); + } + } + + } Index: lucene/core/src/test/org/apache/lucene/index/TestFieldUpdates.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestFieldUpdates.java (revision 1454908) +++ lucene/core/src/test/org/apache/lucene/index/TestFieldUpdates.java (working copy) @@ -115,7 +115,8 @@ // test that the norms are not present in the segment if // omitNorms is true - for (FieldInfo fi : reader.getFieldInfos()) { + FieldInfos fieldInfos = reader.getFieldInfos(); + for (FieldInfo fi : fieldInfos) { if (fi.isIndexed()) { assertTrue("omitNorms=" + fi.omitsNorms() + " normsValue=" + reader.getNormValues(fi.name), fi.omitsNorms() == (reader.getNormValues(fi.name) == null)); } Index: lucene/core/src/test/org/apache/lucene/util/IndexData.java =================================================================== --- lucene/core/src/test/org/apache/lucene/util/IndexData.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/util/IndexData.java (working copy) @@ -0,0 +1,377 @@ +package org.apache.lucene.util; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.StorableField; +import org.apache.lucene.index.StoredDocument; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Contains terms and stored fields extracted from a reader, to be used to + * compare indexes which contain similar documents. + */ +public class IndexData { + + ArrayList nonEmpty; + + public IndexData(IndexReader reader) throws IOException { + Bits liveDocs = MultiFields.getLiveDocs(reader); + + ArrayList docs = new ArrayList(); + for (int i = 0; i < reader.maxDoc(); i++) { + if (liveDocs == null || liveDocs.get(i)) { + docs.add(new DocumentData(reader.document(i))); + } else { + docs.add(new DocumentData(new StoredDocument())); + } + } + + Fields fields = MultiFields.getFields(reader); + if (fields != null) { + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + Terms terms = fields.terms(field); + TermsEnum termsIterator = terms.iterator(null); + BytesRef term; + while ((term = termsIterator.next()) != null) { + DocsEnum termDocs = MultiFields.getTermDocsEnum(reader, liveDocs, + field, term); + while (termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + docs.get(termDocs.docID()).addTerm(field, term.toString(), + termDocs.freq()); + } + } + } + } + + nonEmpty = new ArrayList(); + for (DocumentData doc : docs) { + if (!doc.isEmpty()) { + nonEmpty.add(doc); + } + } + } + + @Override + public String toString() { + return toString(nonEmpty); + } + + public String toString(boolean ordered) { + if (!ordered) { + return toString(); + } + + ArrayList sorted = new ArrayList( + nonEmpty); + Collections.sort(sorted, new DocumentDataComparator()); + return toString(sorted); + } + + public String toString(ArrayList documentDatas) { + StringBuilder builder = new StringBuilder(this.getClass().getSimpleName()); + builder.append('\n'); + int counter = 0; + for (DocumentData doc : documentDatas) { + builder.append('\t'); + builder.append("document "); + builder.append(counter++); + builder.append('\n'); + + // print stored fields + List storedFields = doc.storedFields; + if (!storedFields.isEmpty()) { + builder.append('\t'); + builder.append('\t'); + builder.append("storedFields"); + builder.append('\n'); + for (StorableField storableField : storedFields) { + builder.append('\t'); + builder.append('\t'); + builder.append('\t'); + builder.append(storableField.name()); + builder.append('\t'); + builder.append(storableField.stringValue()); + builder.append('\n'); + } + } + + // print terms + Map termsMap = doc.termsMap; + if (!termsMap.isEmpty()) { + builder.append('\t'); + builder.append('\t'); + builder.append("termsMap"); + builder.append('\n'); + + List termsList = new ArrayList(termsMap.keySet()); + Collections.sort(termsList); + for (String term : termsList) { + builder.append('\t'); + builder.append('\t'); + builder.append('\t'); + builder.append(term); + builder.append('\t'); + builder.append(termsMap.get(term)); + builder.append('\n'); + } + } + } + return builder.toString(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((nonEmpty == null) ? 0 : nonEmpty.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + IndexData other = (IndexData) obj; + if (nonEmpty == null) { + if (other.nonEmpty != null) return false; + } else if (!nonEmpty.equals(other.nonEmpty)) { + return false; + } + return true; + } + + /** + * Test if two index datas contain the same documents, ignoring documents + * order. + */ + public static boolean equalsNoOrder(IndexData id1, IndexData id2) { + if (id1.nonEmpty == null) { + if (id2.nonEmpty != null) return false; + } else if (id2.nonEmpty == null) { + return false; + } + + ArrayList sorted1 = new ArrayList( + id1.nonEmpty); + ArrayList sorted2 = new ArrayList( + id2.nonEmpty); + + Collections.sort(sorted1, new DocumentDataComparator()); + Collections.sort(sorted2, new DocumentDataComparator()); + + Iterator iterator1 = sorted1.iterator(); + Iterator iterator2 = sorted2.iterator(); + + while (iterator1.hasNext()) { + if (!iterator2.hasNext()) { + return false; + } + if (!iterator1.next().equals(iterator2.next())) { + return false; + } + } + + return true; + } + + public static boolean testEquality(ArrayList sorted1, + ArrayList sorted2) { + Iterator iterator1 = sorted1.iterator(); + Iterator iterator2 = sorted2.iterator(); + + while (iterator1.hasNext()) { + if (!iterator2.hasNext()) { + return false; + } + if (!iterator1.next().equals(iterator2.next())) { + return false; + } + } + + return true; + } + + private class DocumentData { + + List storedFields; + + TreeMap termsMap = new TreeMap(); + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((storedFields == null) ? 0 : storedFields.hashCode()); + result = prime * result + ((termsMap == null) ? 0 : termsMap.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + DocumentData other = (DocumentData) obj; + if (storedFields == null) { + if (other.storedFields != null) { + return false; + } + } else { + // compare stored fields on by one + if (storedFields.size() != other.storedFields.size()) { + return false; + } + Iterator fieldsThis = storedFields.iterator(); + Iterator fieldsOther = other.storedFields.iterator(); + while (fieldsThis.hasNext()) { + StorableField fieldThis = fieldsThis.next(); + StorableField fieldOther = fieldsOther.next(); + if (!fieldThis.name().equals(fieldOther.name())) { + return false; + } + if (!fieldThis.stringValue().equals(fieldOther.stringValue())) { + return false; + } + } + } + if (termsMap == null) { + if (other.termsMap != null) { + return false; + } + } else if (!termsMap.equals(other.termsMap)) { + return false; + } + return true; + } + + public DocumentData(StoredDocument document) { + storedFields = document.getFields(); + } + + public boolean isEmpty() { + return storedFields.isEmpty() && termsMap.isEmpty(); + } + + public void addTerm(String field, String term, int freq) { + termsMap.put(field + ":" + term, freq); + } + + } + + private static class DocumentDataComparator implements Comparator { + + @Override + public int compare(DocumentData doc1, DocumentData doc2) { + // start with index terms + Map terms1 = doc1.termsMap; + Map terms2 = doc2.termsMap; + if (terms1.isEmpty() && !terms2.isEmpty()) { + return -1; + } else if (!terms1.isEmpty() && terms2.isEmpty()) { + return 1; + } else { + Iterator iter1 = terms1.keySet().iterator(); + Iterator iter2 = terms2.keySet().iterator(); + while (iter1.hasNext()) { + if (!iter2.hasNext()) { + return -1; + } else { + String t1 = iter1.next(); + String t2 = iter2.next(); + int compTerm = t1.compareTo(t2); + if (compTerm != 0) { + return compTerm; + } + int compFreq = terms1.get(t1) - terms2.get(t2); + if (compFreq != 0) { + return compFreq; + } + } + } + if (iter2.hasNext()) { + return 1; + } + } + + // now check stored fields + List stored1 = doc1.storedFields; + List stored2 = doc2.storedFields; + if (stored1.isEmpty() && !stored2.isEmpty()) { + return -1; + } else if (!stored1.isEmpty() && stored2.isEmpty()) { + return 1; + } else { + Iterator iter1 = stored1.iterator(); + Iterator iter2 = stored2.iterator(); + while (iter1.hasNext()) { + if (!iter2.hasNext()) { + return -1; + } else { + StorableField f1 = iter1.next(); + StorableField f2 = iter2.next(); + int compFieldName = f1.name().compareTo(f2.name()); + if (compFieldName != 0) { + return compFieldName; + } + compFieldName = f1.stringValue().compareTo(f2.stringValue()); + if (compFieldName != 0) { + return compFieldName; + } + } + } + if (iter2.hasNext()) { + return 1; + } + } + return 0; + + } + } +}