Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java (revision 1416361) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.GenerationReplacementsFormat; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; @@ -44,6 +45,7 @@ // TODO: need a plain-text impl (using the above) private final NormsFormat normsFormat = new SimpleTextNormsFormat(); private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat(); + private final GenerationReplacementsFormat generationReplacements = new SimpleTextGenerationReplacementsFormat(); public SimpleTextCodec() { super("SimpleText"); @@ -88,4 +90,9 @@ public LiveDocsFormat liveDocsFormat() { return liveDocs; } + + @Override + public GenerationReplacementsFormat generationReplacementsFormat() { + return generationReplacements; + } } Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextGenerationReplacementsFormat.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextGenerationReplacementsFormat.java (revision 0) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextGenerationReplacementsFormat.java (working copy) @@ -0,0 +1,86 @@ +package org.apache.lucene.codecs.simpletext; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map.Entry; + +import org.apache.lucene.codecs.GenerationReplacementsFormat; +import org.apache.lucene.index.FieldGenerationReplacements; +import org.apache.lucene.index.SegmentInfoPerCommit; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.StringHelper; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class SimpleTextGenerationReplacementsFormat extends + GenerationReplacementsFormat { + final static BytesRef FGR_DOCCOUNT = new BytesRef(" number of documents "); + final static BytesRef FGR_DOC = new BytesRef(" doc "); + final static BytesRef FGR_GENERATION = new BytesRef(" generation "); + + @Override + protected FieldGenerationReplacements readPersistedGeneration(IndexInput input) + throws IOException { + FieldGenerationReplacements reps = new FieldGenerationReplacements(); + + BytesRef scratch = new BytesRef(); + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch, FGR_DOCCOUNT); + final int size = Integer.parseInt(readString(FGR_DOCCOUNT.length, scratch)); + + for (int i = 0; i < size; i++) { + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch, FGR_DOC); + final int doc = Integer.parseInt(readString(FGR_DOC.length, scratch)); + + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch, FGR_GENERATION); + final long generation = Integer.parseInt(readString(FGR_GENERATION.length, scratch)); + + reps.set(doc, generation); + } + + return reps; + } + + private String readString(int offset, BytesRef scratch) { + return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8); + } + + @Override + protected void persistGeneration(FieldGenerationReplacements reps, + IndexOutput output) throws IOException { + BytesRef scratch = new BytesRef(); + SimpleTextUtil.write(output, FGR_DOCCOUNT); + SimpleTextUtil.write(output, Integer.toString(reps.size()), scratch); + SimpleTextUtil.writeNewline(output); + + for (Entry entry : reps){ + SimpleTextUtil.write(output, FGR_DOC); + SimpleTextUtil.write(output, Integer.toString(entry.getKey()), scratch); + SimpleTextUtil.writeNewline(output); + SimpleTextUtil.write(output, FGR_GENERATION); + SimpleTextUtil.write(output, Long.toString(entry.getValue()), scratch); + SimpleTextUtil.writeNewline(output); + } + } + +} Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java (revision 1416361) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java (working copy) @@ -68,7 +68,7 @@ BytesRef scratch = new BytesRef(); CharsRef scratchUTF16 = new CharsRef(); - String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen()); + String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen(), false); IndexInput in = null; boolean success = false; try { @@ -110,7 +110,7 @@ int size = bits.length(); BytesRef scratch = new BytesRef(); - String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getNextDelGen()); + String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getNextDelGen(), false); IndexOutput out = null; boolean success = false; try { @@ -140,7 +140,7 @@ @Override public void files(SegmentInfoPerCommit info, Collection files) throws IOException { if (info.hasDeletions()) { - files.add(IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen())); + files.add(IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen(), false)); } } Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java (revision 1416361) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java (working copy) @@ -32,6 +32,7 @@ private final SegmentInfoWriter writer = new SimpleTextSegmentInfoWriter(); public static final String SI_EXTENSION = "si"; + public static final String SI_FILES_LIST_EXTENSION = "sif"; @Override public SegmentInfoReader getSegmentInfoReader() { Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (revision 1416361) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java (working copy) @@ -25,6 +25,7 @@ import java.util.Set; import org.apache.lucene.codecs.SegmentInfoReader; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; @@ -95,21 +96,23 @@ attributes.put(key, value); } - SimpleTextUtil.readLine(input, scratch); - assert StringHelper.startsWith(scratch, SI_NUM_FILES); - int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch)); - Set files = new HashSet(); + Set files = actualReadFiles(input, scratch); - for (int i = 0; i < numFiles; i++) { - SimpleTextUtil.readLine(input, scratch); - assert StringHelper.startsWith(scratch, SI_FILE); - String fileName = readString(SI_FILE.length, scratch); - files.add(fileName); - } - SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null, diagnostics, Collections.unmodifiableMap(attributes)); info.setFiles(files); + + int updatesIndex = 1; + while (updatesIndex > 0) { + files = readFilesList(directory, segmentName, updatesIndex, context); + if (files == null) { + updatesIndex = -1; + } else { + info.addFiles(files); + updatesIndex++; + } + } + success = true; return info; } finally { @@ -121,6 +124,45 @@ } } + private Set readFilesList(Directory dir, String segment, long generation, IOContext context) throws IOException { + final String segFileName = IndexFileNames.fileNameFromGeneration(segment, Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + if (!dir.fileExists(segFileName)) { + return null; + } + + IndexInput input = dir.openInput(segFileName, context); + boolean success = false; + try { + BytesRef scratch = new BytesRef(); + Set files = actualReadFiles(input, scratch); + + success = true; + return files; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(input); + } else { + input.close(); + } + } + } + + private Set actualReadFiles(IndexInput input, BytesRef scratch) + throws IOException { + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch, SI_NUM_FILES); + int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch)); + Set files = new HashSet(); + + for (int i = 0; i < numFiles; i++) { + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch, SI_FILE); + String fileName = readString(SI_FILE.length, scratch); + files.add(fileName); + } + return files; + } + private String readString(int offset, BytesRef scratch) { return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8); } Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (revision 1416361) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java (working copy) @@ -22,6 +22,7 @@ import java.util.Set; import org.apache.lucene.codecs.SegmentInfoWriter; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; @@ -111,19 +112,32 @@ } } - Set files = si.files(); - int numFiles = files == null ? 0 : files.size(); - SimpleTextUtil.write(output, SI_NUM_FILES); - SimpleTextUtil.write(output, Integer.toString(numFiles), scratch); - SimpleTextUtil.writeNewline(output); + actualWriteFiles(si, output, scratch); + + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(output); + } else { + output.close(); + } + } + } + + @Override + public void writeFilesList(Directory dir, SegmentInfo si, long generation, IOContext ioContext) throws IOException { + final String segFileName = IndexFileNames.fileNameFromGeneration(si.name, + Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + si.addFile(segFileName); - if (numFiles > 0) { - for(String fileName : files) { - SimpleTextUtil.write(output, SI_FILE); - SimpleTextUtil.write(output, fileName, scratch); - SimpleTextUtil.writeNewline(output); - } - } + boolean success = false; + IndexOutput output = dir.createOutput(segFileName, ioContext); + + try { + BytesRef scratch = new BytesRef(); + + actualWriteFiles(si, output, scratch); + success = true; } finally { if (!success) { @@ -137,4 +151,22 @@ } } } + + public void actualWriteFiles(SegmentInfo si, IndexOutput output, + BytesRef scratch) throws IOException { + Set files = si.files(); + int numFiles = files == null ? 0 : files.size(); + SimpleTextUtil.write(output, SI_NUM_FILES); + SimpleTextUtil.write(output, Integer.toString(numFiles), scratch); + SimpleTextUtil.writeNewline(output); + + if (numFiles > 0) { + for(String fileName : files) { + SimpleTextUtil.write(output, SI_FILE); + SimpleTextUtil.write(output, fileName, scratch); + SimpleTextUtil.writeNewline(output); + } + } + } + } Index: lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java =================================================================== --- lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (revision 1416361) +++ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.Set; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.index.FieldInfo; @@ -91,7 +92,7 @@ } @Override - public void visitDocument(int n, StoredFieldVisitor visitor) throws IOException { + public void visitDocument(int n, StoredFieldVisitor visitor, Set ignoreFields) throws IOException { in.seek(offsets[n]); readLine(); assert StringHelper.startsWith(scratch, NUM); @@ -124,15 +125,20 @@ throw new RuntimeException("unknown field type"); } - switch (visitor.needsField(fieldInfo)) { - case YES: - readField(type, fieldInfo, visitor); - break; - case NO: - readLine(); - assert StringHelper.startsWith(scratch, VALUE); - break; - case STOP: return; + if (ignoreFields != null && ignoreFields.contains(fieldInfo.name)) { + readLine(); + } else { + switch (visitor.needsField(fieldInfo)) { + case YES: + readField(type, fieldInfo, visitor); + break; + case NO: + readLine(); + assert StringHelper.startsWith(scratch, VALUE); + break; + case STOP: + return; + } } } } Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (revision 1420473) +++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (working copy) @@ -34,6 +34,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Set; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.StoredFieldsReader; @@ -191,7 +192,7 @@ } @Override - public void visitDocument(int docID, StoredFieldVisitor visitor) + public void visitDocument(int docID, StoredFieldVisitor visitor, Set ignoreFields) throws IOException { fieldsStream.seek(indexReader.getStartPointer(docID)); @@ -267,17 +268,22 @@ final int bits = (int) (infoAndBits & TYPE_MASK); assert bits <= NUMERIC_DOUBLE: "bits=" + Integer.toHexString(bits); - switch(visitor.needsField(fieldInfo)) { - case YES: - readField(documentInput, visitor, fieldInfo, bits); - assert documentInput.getPosition() <= bytes.offset + bytes.length : documentInput.getPosition() + " " + bytes.offset + bytes.length; - break; - case NO: - skipField(documentInput, bits); - assert documentInput.getPosition() <= bytes.offset + bytes.length : documentInput.getPosition() + " " + bytes.offset + bytes.length; - break; - case STOP: - return; + if (ignoreFields != null && ignoreFields.contains(fieldInfo.name)) { + skipField(documentInput, bits); + assert documentInput.getPosition() <= bytes.offset + bytes.length : documentInput.getPosition() + " " + bytes.offset + bytes.length; + } else { + switch(visitor.needsField(fieldInfo)) { + case YES: + readField(documentInput, visitor, fieldInfo, bits); + assert documentInput.getPosition() <= bytes.offset + bytes.length : documentInput.getPosition() + " " + bytes.offset + bytes.length; + break; + case NO: + skipField(documentInput, bits); + assert documentInput.getPosition() <= bytes.offset + bytes.length : documentInput.getPosition() + " " + bytes.offset + bytes.length; + break; + case STOP: + return; + } } } assert documentInput.getPosition() == bytes.offset + bytes.length : documentInput.getPosition() + " " + bytes.offset + " " + bytes.length; Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.GenerationReplacementsFormat; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; @@ -50,6 +51,7 @@ private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat(); private final NormsFormat normsFormat = new Lucene40NormsFormat(); private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); + private final GenerationReplacementsFormat generationReplacementsFormat = new Lucene40GenerationReplacementsFormat(); private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override @@ -103,6 +105,11 @@ return liveDocsFormat; } + @Override + public final GenerationReplacementsFormat generationReplacementsFormat() { + return generationReplacementsFormat; + } + /** Returns the postings format that should be used for writing * new segments of field. * Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40GenerationReplacementsFormat.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40GenerationReplacementsFormat.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40GenerationReplacementsFormat.java (working copy) @@ -0,0 +1,60 @@ +package org.apache.lucene.codecs.lucene40; + +import java.io.IOException; +import java.util.Map.Entry; + +import org.apache.lucene.codecs.GenerationReplacementsFormat; +import org.apache.lucene.index.FieldGenerationReplacements; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Lucene40GenerationReplacementsFormat extends + GenerationReplacementsFormat { + + @Override + protected FieldGenerationReplacements readPersistedGeneration(IndexInput input) + throws IOException { + final int size = input.readVInt(); + FieldGenerationReplacements reps = new FieldGenerationReplacements(); + int curr = 0; + for (int i = 0; i < size; i++) { + curr += input.readVInt(); + reps.set(curr, input.readVLong()); + } + return reps; + } + + @Override + protected void persistGeneration(FieldGenerationReplacements reps, + IndexOutput output) throws IOException { + // write number of replacements + output.writeVInt(reps.size()); + + // write replacements + int prev = 0; + for (Entry entry : reps){ + final int curr = entry.getKey(); + output.writeVInt(curr - prev); + prev = curr; + output.writeVLong(entry.getValue()); + } + } + +} Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java (working copy) @@ -86,7 +86,7 @@ @Override public Bits readLiveDocs(Directory dir, SegmentInfoPerCommit info, IOContext context) throws IOException { - String filename = IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getDelGen()); + String filename = IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getDelGen(), false); final BitVector liveDocs = new BitVector(dir, filename, context); assert liveDocs.count() == info.info.getDocCount() - info.getDelCount(): "liveDocs.count()=" + liveDocs.count() + " info.docCount=" + info.info.getDocCount() + " info.getDelCount()=" + info.getDelCount(); @@ -96,7 +96,7 @@ @Override public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfoPerCommit info, int newDelCount, IOContext context) throws IOException { - String filename = IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getNextDelGen()); + String filename = IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getNextDelGen(), false); final BitVector liveDocs = (BitVector) bits; assert liveDocs.count() == info.info.getDocCount() - info.getDelCount() - newDelCount; assert liveDocs.length() == info.info.getDocCount(); @@ -106,7 +106,7 @@ @Override public void files(SegmentInfoPerCommit info, Collection files) throws IOException { if (info.hasDeletions()) { - files.add(IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getDelGen())); + files.add(IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getDelGen(), false)); } } } Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoFormat.java (working copy) @@ -88,6 +88,7 @@ /** File extension used to store {@link SegmentInfo}. */ public final static String SI_EXTENSION = "si"; + public final static String SI_FILES_LIST_EXTENSION = "sif"; static final String CODEC_NAME = "Lucene40SegmentInfo"; static final int VERSION_START = 0; static final int VERSION_CURRENT = VERSION_START; Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java (working copy) @@ -61,7 +61,7 @@ final boolean isCompoundFile = input.readByte() == SegmentInfo.YES; final Map diagnostics = input.readStringStringMap(); final Map attributes = input.readStringStringMap(); - final Set files = input.readStringSet(); + Set files = input.readStringSet(); if (input.getFilePointer() != input.length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); @@ -71,6 +71,17 @@ null, diagnostics, Collections.unmodifiableMap(attributes)); si.setFiles(files); + int updatesIndex = 1; + while (updatesIndex > 0) { + files = readFilesList(dir, segment, updatesIndex, context); + if (files == null) { + updatesIndex = -1; + } else { + si.addFiles(files); + updatesIndex++; + } + } + success = true; return si; @@ -83,4 +94,34 @@ } } } + + private Set readFilesList(Directory dir, String segment, + long generation, IOContext context) throws IOException { + final String fileName = IndexFileNames.fileNameFromGeneration(segment, + Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + if (!dir.fileExists(fileName)) { + return null; + } + + final IndexInput input = dir.openInput(fileName, context); + boolean success = false; + try { + final Set files = input.readStringSet(); + + if (input.getFilePointer() != input.length()) { + throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); + } + + success = true; + + return files; + + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(input); + } else { + input.close(); + } + } + } } Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java (working copy) @@ -71,4 +71,28 @@ } } } + + @Override + public void writeFilesList(Directory dir, SegmentInfo si, + long generation, IOContext ioContext) throws IOException { + final String fileName = IndexFileNames.fileNameFromGeneration(si.name, + Lucene40SegmentInfoFormat.SI_FILES_LIST_EXTENSION, generation, true); + si.addFile(fileName); + + final IndexOutput output = dir.createOutput(fileName, ioContext); + + boolean success = false; + try { + output.writeStringSet(si.files()); + + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(output); + si.dir.deleteFile(fileName); + } else { + output.close(); + } + } + } } Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java (revision 1420474) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java (working copy) @@ -34,6 +34,7 @@ import org.apache.lucene.util.IOUtils; import java.io.Closeable; +import java.util.Set; import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.*; @@ -142,7 +143,7 @@ } @Override - public final void visitDocument(int n, StoredFieldVisitor visitor) throws IOException { + public final void visitDocument(int n, StoredFieldVisitor visitor, Set ignoreFields) throws IOException { seekIndex(n); fieldsStream.seek(indexStream.readLong()); @@ -154,15 +155,19 @@ int bits = fieldsStream.readByte() & 0xFF; assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits); - switch(visitor.needsField(fieldInfo)) { - case YES: - readField(visitor, fieldInfo, bits); - break; - case NO: - skipField(bits); - break; - case STOP: - return; + if (ignoreFields != null && ignoreFields.contains(fieldInfo.name)) { + skipField(bits); + } else { + switch (visitor.needsField(fieldInfo)) { + case YES: + readField(visitor, fieldInfo, bits); + break; + case NO: + skipField(bits); + break; + case STOP: + return; + } } } } Index: lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.GenerationReplacementsFormat; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; @@ -29,6 +30,7 @@ import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat; import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat; +import org.apache.lucene.codecs.lucene40.Lucene40GenerationReplacementsFormat; import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat; import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; @@ -55,6 +57,7 @@ private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat(); private final NormsFormat normsFormat = new Lucene40NormsFormat(); private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); + private final GenerationReplacementsFormat generationReplacementsFormat = new Lucene40GenerationReplacementsFormat(); private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override @@ -108,6 +111,11 @@ return liveDocsFormat; } + @Override + public final GenerationReplacementsFormat generationReplacementsFormat() { + return generationReplacementsFormat; + } + /** Returns the postings format that should be used for writing * new segments of field. * Index: lucene/core/src/java/org/apache/lucene/codecs/Codec.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/Codec.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/Codec.java (working copy) @@ -86,6 +86,9 @@ /** Encodes/decodes live docs */ public abstract LiveDocsFormat liveDocsFormat(); + /** Encodes/decodes live docs */ + public abstract GenerationReplacementsFormat generationReplacementsFormat(); + /** looks up a codec by name */ public static Codec forName(String name) { if (loader == null) { Index: lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java (working copy) @@ -75,6 +75,11 @@ } @Override + public GenerationReplacementsFormat generationReplacementsFormat() { + return delegate.generationReplacementsFormat(); + } + + @Override public NormsFormat normsFormat() { return delegate.normsFormat(); } Index: lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/codecs/GenerationReplacementsFormat.java (working copy) @@ -0,0 +1,168 @@ +package org.apache.lucene.codecs; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collection; +import java.util.regex.Pattern; + +import org.apache.lucene.index.FieldGenerationReplacements; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfoPerCommit; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.IOUtils; + +/** + * Format for field replacements of certain generation + * + * @lucene.experimental + */ +public abstract class GenerationReplacementsFormat { + + /** Extension of generation replacements vectors */ + static final String FIELD_GENERATION_REPLACEMENT_EXTENSION = "fgr"; + + /** + * Sole constructor. (For invocation by subclass constructors, typically + * implicit.) + */ + protected GenerationReplacementsFormat() {} + + /** + * Read field generation replacements. If no replacements exist return + * {@code null}. + */ + public FieldGenerationReplacements readGenerationReplacements(String field, + SegmentInfoPerCommit info, IOContext context) throws IOException { + String fileName = getLastGenerationFileName(field, info.info.dir, info); + if (fileName == null) { + return null; + } + + return internalReadGeneration(info.info.dir, fileName, context); + } + + private FieldGenerationReplacements internalReadGeneration(Directory dir, + String fileName, IOContext context) throws IOException { + IndexInput input = dir.openInput(fileName, context); + + boolean success = false; + try { + final FieldGenerationReplacements persistedGeneration = readPersistedGeneration(input); + success = true; + return persistedGeneration; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(input); + } else { + input.close(); + } + } + } + + private String getLastGenerationFileName(String field, Directory dir, + SegmentInfoPerCommit info) throws IOException { + for (long i = info.getUpdateGen(); i > 0; i--) { + final String fileName = IndexFileNames.segmentFileName( + IndexFileNames.fileNameFromGeneration(info.info.name, "", i, false), + field, FIELD_GENERATION_REPLACEMENT_EXTENSION); + if (dir.fileExists(fileName)) { + return fileName; + } + } + return null; + } + + /** + * Read persisted field generation replacements from a given input. + */ + protected abstract FieldGenerationReplacements readPersistedGeneration( + IndexInput input) throws IOException; + + /** + * Persist field generation replacements. Use + * {@link SegmentInfoPerCommit#getNextUpdateGen()} to determine the generation + * of the deletes file you should write to. + */ + public void writeGenerationReplacement(String field, + FieldGenerationReplacements reps, Directory dir, + SegmentInfoPerCommit info, IOContext context) throws IOException { + if (reps == null) { + // nothing new to write + return; + } + + // load replacements from previous file + String prevFileName = getLastGenerationFileName(field, dir, info); + final FieldGenerationReplacements existing; + if (prevFileName != null) { + existing = internalReadGeneration(dir, prevFileName, context); + existing.merge(reps); + } else { + existing = reps; + } + + final String nameWithGeneration = IndexFileNames.fileNameFromGeneration( + info.info.name, "", info.getNextUpdateGen(), false); + final String fileName = IndexFileNames.segmentFileName(nameWithGeneration, + field, FIELD_GENERATION_REPLACEMENT_EXTENSION); + + final IndexOutput output = dir.createOutput(fileName, context); + boolean success = false; + try { + persistGeneration(reps, output); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(output); + info.info.dir.deleteFile(fileName); + } else { + output.close(); + if (prevFileName != null) { + // remove previous file + info.info.dir.deleteFile(prevFileName); + } + } + } + } + + /** + * Persist field generation replacements to a given output. + */ + protected abstract void persistGeneration(FieldGenerationReplacements reps, + IndexOutput output) throws IOException; + + /** + * Records all files in use by this {@link SegmentInfoPerCommit} into the + * files argument. + */ + public void files(SegmentInfoPerCommit info, Directory dir, + Collection files) throws IOException { + Pattern pattern = Pattern.compile(info.info.name + "[\\S]*." + + FIELD_GENERATION_REPLACEMENT_EXTENSION); + final String[] dirFiles = dir.listAll(); + for (int i = 0; i < dirFiles.length; i++) { + if (pattern.matcher(dirFiles[i]).matches()) { + files.add(dirFiles[i]); + } + } + } +} Index: lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoReader.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoReader.java (working copy) @@ -39,6 +39,7 @@ * Read {@link SegmentInfo} data from a directory. * @param directory directory to read from * @param segmentName name of the segment to read + * @param context IO context to use * @return infos instance to be populated with data * @throws IOException If an I/O error occurs */ Index: lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoWriter.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/codecs/SegmentInfoWriter.java (working copy) @@ -37,7 +37,25 @@ /** * Write {@link SegmentInfo} data. + + * @param dir directory to write to + * @param info the segment info to write + * @param fis field infos to use + * @param ioContext IO context to use * @throws IOException If an I/O error occurs */ public abstract void write(Directory dir, SegmentInfo info, FieldInfos fis, IOContext ioContext) throws IOException; + + /** + * Write the list of files belonging to an updates segment of the segment with + * {@link SegmentInfo}, with the given updates generation. + * + * @param dir directory to write to + * @param info info of the segment to write + * @param generation updates generation, or 0 for segment base + * @param ioContext IO context to use + * @throws IOException + * If an I/O error occurs + */ + public abstract void writeFilesList(Directory dir, SegmentInfo info, long generation, IOContext ioContext) throws IOException; } Index: lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java (revision 1420471) +++ lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java (working copy) @@ -18,13 +18,14 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Set; import org.apache.lucene.index.StoredFieldVisitor; /** * Codec API for reading stored fields. *

- * You need to implement {@link #visitDocument(int, StoredFieldVisitor)} to + * You need to implement {@link #visitDocument(int, StoredFieldVisitor, Set)} to * read the stored fields for a document, implement {@link #clone()} (creating * clones of any IndexInputs used, etc), and {@link #close()} * @lucene.experimental @@ -35,8 +36,9 @@ protected StoredFieldsReader() { } - /** Visit the stored fields for document n */ - public abstract void visitDocument(int n, StoredFieldVisitor visitor) throws IOException; + /** Visit the stored fields for document n, ignoring certain + * fields. */ + public abstract void visitDocument(int n, StoredFieldVisitor visitor, Set ignoreFields) throws IOException; @Override public abstract StoredFieldsReader clone(); Index: lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (revision 1420474) +++ lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (working copy) @@ -80,7 +80,7 @@ * since deletes are applied to the wrong segments. */ packet.setDelGen(nextGen++); - assert packet.any(); + assert packet.anyDeletes() || packet.anyUpdates(); assert checkDeleteStats(); assert packet.delGen() < nextGen; assert deletes.isEmpty() || deletes.get(deletes.size()-1).delGen() < packet.delGen() : "Delete packets must be in order"; @@ -187,7 +187,7 @@ final SegmentInfoPerCommit info = infos2.get(infosIDX); final long segGen = info.getBufferedDeletesGen(); - if (packet != null && segGen < packet.delGen()) { + if (packet != null && packet.anyDeletes() && segGen < packet.delGen()) { //System.out.println(" coalesce"); if (coalescedDeletes == null) { coalescedDeletes = new CoalescedDeletes(); @@ -204,7 +204,7 @@ } delIDX--; - } else if (packet != null && segGen == packet.delGen()) { + } else if (packet != null && packet.anyDeletes() && segGen == packet.delGen()) { assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen; //System.out.println(" eq"); @@ -295,6 +295,25 @@ infosIDX--; } } + + for (SegmentInfoPerCommit updateInfo : infos2) { + //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); + final long updateSegGen = updateInfo.getBufferedDeletesGen(); + + for (FrozenBufferedDeletes updatePacket : deletes) { + if (updatePacket.anyUpdates() && updatePacket.delGen() <= updateSegGen) { + assert readerPool.infoIsLive(updateInfo); + final ReadersAndLiveDocs rld = readerPool.get(updateInfo, true); + final SegmentReader reader = rld.getReader(IOContext.READ); + try { + anyNewDeletes |= applyTermUpdates(updatePacket.updateTerms, updatePacket.updateArrays, rld, reader); + } finally { + rld.release(reader); + readerPool.release(rld); + } + } + } + } assert checkDeleteStats(); if (infoStream.isEnabled("BD")) { @@ -427,6 +446,77 @@ return delCount; } + private synchronized boolean applyTermUpdates(PrefixCodedTerms updateTerms, + FieldsUpdate[][] updateArrays, ReadersAndLiveDocs rld, + SegmentReader reader) throws IOException { + Fields fields = reader.fields(); + if (fields == null) { + // This reader has no postings + return false; + } + + TermsEnum termsEnum = null; + + String currentField = null; + DocsEnum docs = null; + + assert checkDeleteTerm(null); + + UpdatedSegmentData updatedSegmentData = new UpdatedSegmentData(); + int termIndex = -1; + + // System.out.println(Thread.currentThread().getName() + + // " del terms reader=" + reader); + for (Term term : updateTerms) { + termIndex++; + // Since we visit terms sorted, we gain performance + // by re-using the same TermsEnum and seeking only + // forwards + if (!term.field().equals(currentField)) { + assert currentField == null || currentField.compareTo(term.field()) < 0; + currentField = term.field(); + Terms terms = fields.terms(currentField); + if (terms != null) { + termsEnum = terms.iterator(null); + } else { + termsEnum = null; + } + } + + if (termsEnum == null) { + continue; + } + assert checkDeleteTerm(term); + + // System.out.println(" term=" + term); + + if (termsEnum.seekExact(term.bytes(), false)) { + // we don't need term frequencies for this + DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, 0); + // System.out.println("BDS: got docsEnum=" + docsEnum); + + if (docsEnum != null) { + while (true) { + final int docID = docsEnum.nextDoc(); + // System.out.println(Thread.currentThread().getName() + + // " del term=" + term + " doc=" + docID); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + updatedSegmentData.addUpdates(docID, updateArrays[termIndex]); + } + } + } + } + + if (updatedSegmentData.hasUpdates()) { + rld.setLiveUpdates(updatedSegmentData); + return true; + } + + return false; + } + public static class QueryAndLimit { public final Query query; public final int limit; Index: lucene/core/src/java/org/apache/lucene/index/BufferedUpdates.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BufferedUpdates.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/BufferedUpdates.java (working copy) @@ -0,0 +1,109 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.lucene.util.RamUsageEstimator; + +/* Holds buffered updates by term for a + * single segment. This is used to hold buffered pending + * updates against the to-be-flushed segment. Once the + * updates are pushed (on flush in DocumentsWriter), these + * updates are converted to a FrozenUpdates instance. */ + +// NOTE: we are sync'd by BufferedUpdates, ie, all access to +// instances of this class is via sync'd methods on +// BufferedUpdates + +class BufferedUpdates { + + final AtomicInteger numTermUpdates = new AtomicInteger(); + final SortedFieldsUpdates terms = new SortedFieldsUpdates(); + + public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE); + + final AtomicLong bytesUsed; + + private final static boolean VERBOSE_DELETES = false; + + long gen; + public BufferedUpdates() { + this(new AtomicLong()); + } + + BufferedUpdates(AtomicLong bytesUsed) { + assert bytesUsed != null; + this.bytesUsed = bytesUsed; + } + + @Override + public String toString() { + if (VERBOSE_DELETES) { + return "gen=" + gen + " numTerms=" + numTermUpdates + ", terms=" + terms + + ", bytesUsed=" + bytesUsed; + } else { + String s = "gen=" + gen; + if (numTermUpdates.get() != 0) { + s += " " + numTermUpdates.get() + " updated terms (unique count=" + terms.size() + ")"; + } + if (bytesUsed.get() != 0) { + s += " bytesUsed=" + bytesUsed.get(); + } + + return s; + } + } + + public void addTerm(Term term, FieldsUpdate update) { + SortedSet current = terms.get(term); + //if (current != null && update.docIDUpto < current.peek().docIDUpto) { + // Only record the new number if it's greater than the + // current one. This is important because if multiple + // threads are replacing the same doc at nearly the + // same time, it's possible that one thread that got a + // higher docID is scheduled before the other + // threads. If we blindly replace than we can + // incorrectly get both docs indexed. + //return; + //} + + if (current == null) { + current = new TreeSet(); + terms.put(term, current); + bytesUsed.addAndGet(BufferedDeletes.BYTES_PER_DEL_TERM + + term.bytes.length + + (RamUsageEstimator.NUM_BYTES_CHAR * term.field().length())); + } + current.add(update); + numTermUpdates.incrementAndGet(); + } + + void clear() { + terms.clear(); + numTermUpdates.set(0); + bytesUsed.set(0); + } + + boolean any() { + return terms.size() > 0; + } +} Index: lucene/core/src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (revision 1420475) +++ lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -30,9 +30,7 @@ import org.apache.lucene.codecs.BlockTreeTermsReader; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.PostingsFormat; // javadocs -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldType; // for javadocs +import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.FieldInfo.IndexOptions; @@ -46,6 +44,8 @@ import org.apache.lucene.util.CommandLineUtil; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.StringHelper; +// javadocs +// for javadocs /** * Basic tool and API to check the health of an index and @@ -781,7 +781,7 @@ } final int docFreq = termsEnum.docFreq(); - if (docFreq <= 0) { + if (docFreq < 0) { throw new RuntimeException("docfreq: " + docFreq + " is out of bounds"); } sumDocFreq += docFreq; @@ -919,7 +919,7 @@ throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount); } if (hasTotalTermFreq) { - if (totalTermFreq2 <= 0) { + if (totalTermFreq2 < 0) { throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds"); } sumTotalTermFreq += totalTermFreq; Index: lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (revision 1420475) +++ lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (working copy) @@ -28,7 +28,6 @@ import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.PerDocConsumer; -import org.apache.lucene.document.FieldType; import org.apache.lucene.index.DocumentsWriterPerThread.DocState; import org.apache.lucene.index.TypePromoter.TypeCompatibility; import org.apache.lucene.store.IOContext; @@ -370,7 +369,7 @@ } if (perDocConsumer == null) { - PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(""); + PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(); perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState); if (perDocConsumer == null) { throw new IllegalStateException("codec=" + docState.docWriter.codec + " does not support docValues: from docValuesFormat().docsConsumer(...) returned null; field=" + fieldInfo.name); Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -20,10 +20,13 @@ import java.io.IOException; import java.util.Collection; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.GenerationReplacementsFormat; import org.apache.lucene.index.DocumentsWriterFlushQueue.SegmentFlushTicket; import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; @@ -33,89 +36,75 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util.MutableBits; /** - * This class accepts multiple added documents and directly - * writes segment files. - * - * Each added document is passed to the {@link DocConsumer}, - * which in turn processes the document and interacts with - * other consumers in the indexing chain. Certain - * consumers, like {@link StoredFieldsConsumer} and {@link - * TermVectorsConsumer}, digest a document and - * immediately write bytes to the "doc store" files (ie, - * they do not consume RAM per document, except while they - * are processing the document). - * - * Other consumers, eg {@link FreqProxTermsWriter} and - * {@link NormsConsumer}, buffer bytes in RAM and flush only - * when a new segment is produced. - - * Once we have used our allowed RAM buffer, or the number - * of added docs is large enough (in the case we are - * flushing by doc count instead of RAM usage), we create a - * real segment and flush it to the Directory. - * + * This class accepts multiple added documents and directly writes segment + * files. + * + * Each added document is passed to the {@link DocConsumer}, which in turn + * processes the document and interacts with other consumers in the indexing + * chain. Certain consumers, like {@link StoredFieldsConsumer} and + * {@link TermVectorsConsumer}, digest a document and immediately write bytes to + * the "doc store" files (ie, they do not consume RAM per document, except while + * they are processing the document). + * + * Other consumers, eg {@link FreqProxTermsWriter} and {@link NormsConsumer}, + * buffer bytes in RAM and flush only when a new segment is produced. + * + * Once we have used our allowed RAM buffer, or the number of added docs is + * large enough (in the case we are flushing by doc count instead of RAM usage), + * we create a real segment and flush it to the Directory. + * * Threads: - * - * Multiple threads are allowed into addDocument at once. - * There is an initial synchronized call to getThreadState - * which allocates a ThreadState for this thread. The same - * thread will get the same ThreadState over time (thread - * affinity) so that if there are consistent patterns (for - * example each thread is indexing a different content - * source) then we make better use of RAM. Then - * processDocument is called on that ThreadState without - * synchronization (most of the "heavy lifting" is in this - * call). Finally the synchronized "finishDocument" is - * called to flush changes to the directory. - * - * When flush is called by IndexWriter we forcefully idle - * all threads and flush only once they are all idle. This - * means you can call flush with a given thread even while - * other threads are actively adding/deleting documents. - * - * + * + * Multiple threads are allowed into addDocument at once. There is an initial + * synchronized call to getThreadState which allocates a ThreadState for this + * thread. The same thread will get the same ThreadState over time (thread + * affinity) so that if there are consistent patterns (for example each thread + * is indexing a different content source) then we make better use of RAM. Then + * processDocument is called on that ThreadState without synchronization (most + * of the "heavy lifting" is in this call). Finally the synchronized + * "finishDocument" is called to flush changes to the directory. + * + * When flush is called by IndexWriter we forcefully idle all threads and flush + * only once they are all idle. This means you can call flush with a given + * thread even while other threads are actively adding/deleting documents. + * + * * Exceptions: - * - * Because this class directly updates in-memory posting - * lists, and flushes stored fields and term vectors - * directly to files in the directory, there are certain - * limited times when an exception can corrupt this state. - * For example, a disk full while flushing stored fields - * leaves this file in a corrupt state. Or, an OOM - * exception while appending to the in-memory posting lists - * can corrupt that posting list. We call such exceptions - * "aborting exceptions". In these cases we must call - * abort() to discard all docs added since the last flush. - * - * All other exceptions ("non-aborting exceptions") can - * still partially update the index structures. These - * updates are consistent, but, they represent only a part - * of the document seen up until the exception was hit. - * When this happens, we immediately mark the document as - * deleted so that the document is always atomically ("all - * or none") added to the index. + * + * Because this class directly updates in-memory posting lists, and flushes + * stored fields and term vectors directly to files in the directory, there are + * certain limited times when an exception can corrupt this state. For example, + * a disk full while flushing stored fields leaves this file in a corrupt state. + * Or, an OOM exception while appending to the in-memory posting lists can + * corrupt that posting list. We call such exceptions "aborting exceptions". In + * these cases we must call abort() to discard all docs added since the last + * flush. + * + * All other exceptions ("non-aborting exceptions") can still partially update + * the index structures. These updates are consistent, but, they represent only + * a part of the document seen up until the exception was hit. When this + * happens, we immediately mark the document as deleted so that the document is + * always atomically ("all or none") added to the index. */ final class DocumentsWriter { Directory directory; - + private volatile boolean closed; - + final InfoStream infoStream; Similarity similarity; - + List newFiles; - + final IndexWriter indexWriter; - + private AtomicInteger numDocsInRAM = new AtomicInteger(0); - + // TODO: cut over to BytesRefHash in BufferedDeletes volatile DocumentsWriterDeleteQueue deleteQueue = new DocumentsWriterDeleteQueue(); private final DocumentsWriterFlushQueue ticketQueue = new DocumentsWriterFlushQueue(); @@ -126,17 +115,20 @@ * #anyChanges() & #flushAllThreads */ private volatile boolean pendingChangesInCurrentFullFlush; - - private Collection abortedFiles; // List of files that were written before last abort() - + + private Collection abortedFiles; // List of files that were written + // before last abort() + final IndexingChain chain; - + final DocumentsWriterPerThreadPool perThreadPool; final FlushPolicy flushPolicy; final DocumentsWriterFlushControl flushControl; final Codec codec; - DocumentsWriter(Codec codec, LiveIndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers, + + DocumentsWriter(Codec codec, LiveIndexWriterConfig config, + Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers, BufferedDeletesStream bufferedDeletesStream) { this.codec = codec; this.directory = directory; @@ -151,7 +143,7 @@ flushPolicy.init(this); flushControl = new DocumentsWriterFlushControl(this, config); } - + synchronized void deleteQueries(final Query... queries) throws IOException { deleteQueue.addDelete(queries); flushControl.doOnDelete(); @@ -159,7 +151,7 @@ applyAllDeletes(deleteQueue); } } - + // TODO: we could check w/ FreqProxTermsWriter: if the // term doesn't exist, don't bother buffering into the // per-DWPT map (but still must go into the global map) @@ -171,47 +163,49 @@ applyAllDeletes(deleteQueue); } } - + DocumentsWriterDeleteQueue currentDeleteSession() { return deleteQueue; } - private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException { + private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) + throws IOException { if (deleteQueue != null && !flushControl.isFullFlush()) { ticketQueue.addDeletesAndPurge(this, deleteQueue); } indexWriter.applyAllDeletes(); indexWriter.flushCount.incrementAndGet(); } - + /** Returns how many docs are currently buffered in RAM. */ int getNumDocs() { return numDocsInRAM.get(); } - + Collection abortedFiles() { return abortedFiles; } - + private void ensureOpen() throws AlreadyClosedException { if (closed) { throw new AlreadyClosedException("this IndexWriter is closed"); } } - - /** Called if we hit an exception at a bad time (when - * updating the index files) and must discard all - * currently buffered docs. This resets our state, - * discarding any docs added since last flush. */ + + /** + * Called if we hit an exception at a bad time (when updating the index files) + * and must discard all currently buffered docs. This resets our state, + * discarding any docs added since last flush. + */ synchronized void abort() { boolean success = false; - + try { deleteQueue.clear(); if (infoStream.isEnabled("DW")) { infoStream.message("DW", "abort"); } - + final int limit = perThreadPool.getActiveThreadState(); for (int i = 0; i < limit; i++) { final ThreadState perThread = perThreadPool.getThreadState(i); @@ -236,53 +230,58 @@ success = true; } finally { if (infoStream.isEnabled("DW")) { - infoStream.message("DW", "done abort; abortedFiles=" + abortedFiles + " success=" + success); + infoStream.message("DW", "done abort; abortedFiles=" + abortedFiles + + " success=" + success); } } } - + boolean anyChanges() { if (infoStream.isEnabled("DW")) { - infoStream.message("DW", "anyChanges? numDocsInRam=" + numDocsInRAM.get() - + " deletes=" + anyDeletions() + " hasTickets:" - + ticketQueue.hasTickets() + " pendingChangesInFullFlush: " - + pendingChangesInCurrentFullFlush); + infoStream.message("DW", + "anyChanges? numDocsInRam=" + numDocsInRAM.get() + " deletes=" + + anyDeletions() + " hasTickets:" + ticketQueue.hasTickets() + + " pendingChangesInFullFlush: " + + pendingChangesInCurrentFullFlush); } /* - * changes are either in a DWPT or in the deleteQueue. - * yet if we currently flush deletes and / or dwpt there - * could be a window where all changes are in the ticket queue - * before they are published to the IW. ie we need to check if the - * ticket queue has any tickets. + * changes are either in a DWPT or in the deleteQueue. yet if we currently + * flush deletes and / or dwpt there could be a window where all changes are + * in the ticket queue before they are published to the IW. ie we need to + * check if the ticket queue has any tickets. */ - return numDocsInRAM.get() != 0 || anyDeletions() || ticketQueue.hasTickets() || pendingChangesInCurrentFullFlush; + return numDocsInRAM.get() != 0 || anyDeletions() + || ticketQueue.hasTickets() || pendingChangesInCurrentFullFlush; } public int getBufferedDeleteTermsSize() { return deleteQueue.getBufferedDeleteTermsSize(); } - - //for testing + + // for testing public int getNumBufferedDeleteTerms() { return deleteQueue.numGlobalTermDeletes(); } - + public boolean anyDeletions() { return deleteQueue.anyChanges(); } - + void close() { closed = true; flushControl.setClosed(); } - + private boolean preUpdate() throws IOException { ensureOpen(); boolean maybeMerge = false; if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) { // Help out flushing any queued DWPTs so we can un-stall: if (infoStream.isEnabled("DW")) { - infoStream.message("DW", "DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)"); + infoStream + .message( + "DW", + "DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)"); } do { // Try pick up pending threads here if possible @@ -291,52 +290,58 @@ // Don't push the delete here since the update could fail! maybeMerge |= doFlush(flushingDWPT); } - + if (infoStream.isEnabled("DW")) { if (flushControl.anyStalledThreads()) { - infoStream.message("DW", "WARNING DocumentsWriter has stalled threads; waiting"); + infoStream.message("DW", + "WARNING DocumentsWriter has stalled threads; waiting"); } } flushControl.waitIfStalled(); // block if stalled - } while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing - + } while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try + // help flushing + if (infoStream.isEnabled("DW")) { - infoStream.message("DW", "continue indexing after helping out flushing DocumentsWriter is healthy"); + infoStream + .message("DW", + "continue indexing after helping out flushing DocumentsWriter is healthy"); } } return maybeMerge; } - - private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean maybeMerge) throws IOException { + + private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, + boolean maybeMerge) throws IOException { if (flushControl.doApplyAllDeletes()) { applyAllDeletes(deleteQueue); } if (flushingDWPT != null) { maybeMerge |= doFlush(flushingDWPT); } else { - final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush(); + final DocumentsWriterPerThread nextPendingFlush = flushControl + .nextPendingFlush(); if (nextPendingFlush != null) { maybeMerge |= doFlush(nextPendingFlush); } } - + return maybeMerge; } - - boolean updateDocuments(final Iterable docs, final Analyzer analyzer, - final Term delTerm) throws IOException { + + boolean updateDocuments(final Iterable docs, + final Analyzer analyzer, final Term delTerm) throws IOException { boolean maybeMerge = preUpdate(); - + final ThreadState perThread = flushControl.obtainAndLock(); final DocumentsWriterPerThread flushingDWPT; try { if (!perThread.isActive()) { ensureOpen(); - assert false: "perThread is not active but we are still open"; + assert false : "perThread is not active but we are still open"; } - + final DocumentsWriterPerThread dwpt = perThread.dwpt; try { final int docCount = dwpt.updateDocuments(docs, analyzer, delTerm); @@ -351,29 +356,30 @@ } finally { perThread.unlock(); } - + return postUpdate(flushingDWPT, maybeMerge); } - + boolean updateDocument(final IndexDocument doc, final Analyzer analyzer, final Term delTerm) throws IOException { - + boolean maybeMerge = preUpdate(); - + final ThreadState perThread = flushControl.obtainAndLock(); - + final DocumentsWriterPerThread flushingDWPT; try { - + if (!perThread.isActive()) { ensureOpen(); - throw new IllegalStateException("perThread is not active but we are still open"); + throw new IllegalStateException( + "perThread is not active but we are still open"); } - + final DocumentsWriterPerThread dwpt = perThread.dwpt; try { - dwpt.updateDocument(doc, analyzer, delTerm); + dwpt.updateDocument(doc, analyzer, delTerm); numDocsInRAM.incrementAndGet(); } finally { if (dwpt.checkAndResetHasAborted()) { @@ -385,20 +391,104 @@ } finally { perThread.unlock(); } - + return postUpdate(flushingDWPT, maybeMerge); } - - private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException { - boolean maybeMerge = false; + + boolean updateFields(Term term, FieldsUpdate fieldsUpdate) throws IOException { + boolean maybeMerge = preUpdate(); + + final ThreadState perThread = flushControl.obtainAndLock(); + + final DocumentsWriterPerThread flushingDWPT; + + try { + + if (!perThread.isActive()) { + ensureOpen(); + throw new IllegalStateException( + "perThread is not active but we are still open"); + } + + final DocumentsWriterPerThread dwpt = perThread.dwpt; + try { + dwpt.updateFields(term, fieldsUpdate); + } finally { + if (dwpt.checkAndResetHasAborted()) { + flushControl.doOnAbort(perThread); + } + } + final boolean isUpdate = term != null; + flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); + } finally { + perThread.unlock(); + } + return postUpdate(flushingDWPT, maybeMerge); + } + + void writeUpdatedSegment(UpdatedSegmentData liveUpdates, + SegmentInfoPerCommit info, IndexFileDeleter deleter) throws IOException { + final ThreadState perThread = flushControl.obtainAndLock(); + + try { + if (!perThread.isActive()) { + ensureOpen(); + throw new IllegalStateException( + "perThread is not active but we are still open"); + } + + final DocumentsWriterPerThread dwpt = perThread.dwpt; + try { + // start new segment, with update generation in name + dwpt.initSegmentInfo(info.info, info.getNextUpdateGen()); + + // push documents, including empty ones where needed + liveUpdates.startWriting(info.getNextUpdateGen(), + info.info.getDocCount()); + IndexDocument doc; + while ((doc = liveUpdates.nextDocument()) != null) { + dwpt.updateDocument(doc, liveUpdates.getAnalyzer(), null); + } + + // add field generation replacements + final Map generationReplacments = liveUpdates + .getFieldGenerationReplacments(); + if (generationReplacments != null) { + for (Entry field : generationReplacments + .entrySet()) { + final GenerationReplacementsFormat repsFormat = codec + .generationReplacementsFormat(); + repsFormat.writeGenerationReplacement(field.getKey(), + field.getValue(), directory, info, IOContext.DEFAULT); + } + } + + // flush directly + dwpt.clearDeleteSlice(); + dwpt.flush(info.getNextUpdateGen()); + } finally { + if (dwpt.checkAndResetHasAborted()) { + flushControl.doOnAbort(perThread); + } + } + } finally { + perThread.unlock(); + } + } + + private boolean doFlush(DocumentsWriterPerThread flushingDWPT) + throws IOException { + int actualFlushes = 0; while (flushingDWPT != null) { - maybeMerge = true; + actualFlushes++; boolean success = false; SegmentFlushTicket ticket = null; try { assert currentFullFlushDelQueue == null || flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: " - + currentFullFlushDelQueue + "but was: " + flushingDWPT.deleteQueue + + currentFullFlushDelQueue + + "but was: " + + flushingDWPT.deleteQueue + " " + flushControl.isFullFlush(); /* * Since with DWPT the flush process is concurrent and several DWPT @@ -415,18 +505,25 @@ * might miss to deletes documents in 'A'. */ try { - // Each flush is assigned a ticket in the order they acquire the ticketQueue lock + // Each flush is assigned a ticket in the order they acquire the + // ticketQueue lock ticket = ticketQueue.addFlushTicket(flushingDWPT); - + // flush concurrently without locking - final FlushedSegment newSegment = flushingDWPT.flush(); - ticketQueue.addSegment(ticket, newSegment); - // flush was successful once we reached this point - new seg. has been assigned to the ticket! - success = true; + final FlushedSegment newSegment = flushingDWPT.flush(-1); + if (newSegment == null) { + actualFlushes--; + } else { + ticketQueue.addSegment(ticket, newSegment); + // flush was successful once we reached this point - new seg. has + // been assigned to the ticket! + success = true; + } } finally { if (!success && ticket != null) { // In the case of a failure make sure we are making progress and - // apply all the deletes since the segment flush failed since the flush + // apply all the deletes since the segment flush failed since the + // flush // ticket could hold global deletes see FlushTicket#canPublish() ticketQueue.markTicketFailed(ticket); } @@ -435,59 +532,63 @@ * Now we are done and try to flush the ticket queue if the head of the * queue has already finished the flush. */ - if (ticketQueue.getTicketCount() >= perThreadPool.getActiveThreadState()) { + if (ticketQueue.getTicketCount() >= perThreadPool + .getActiveThreadState()) { // This means there is a backlog: the one // thread in innerPurge can't keep up with all - // other threads flushing segments. In this case + // other threads flushing segments. In this case // we forcefully stall the producers. ticketQueue.forcePurge(this); } else { ticketQueue.tryPurge(this); } - + } finally { flushControl.doAfterFlush(flushingDWPT); flushingDWPT.checkAndResetHasAborted(); indexWriter.flushCount.incrementAndGet(); indexWriter.doAfterFlush(); } - + flushingDWPT = flushControl.nextPendingFlush(); } - + // If deletes alone are consuming > 1/2 our RAM // buffer, force them all to apply now. This is to // prevent too-frequent flushing of a long tail of // tiny segments: final double ramBufferSizeMB = indexWriter.getConfig().getRAMBufferSizeMB(); - if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && - flushControl.getDeleteBytesUsed() > (1024*1024*ramBufferSizeMB/2)) { + if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH + && flushControl.getDeleteBytesUsed() > (1024 * 1024 * ramBufferSizeMB / 2)) { if (infoStream.isEnabled("DW")) { - infoStream.message("DW", "force apply deletes bytesUsed=" + flushControl.getDeleteBytesUsed() + " vs ramBuffer=" + (1024*1024*ramBufferSizeMB)); + infoStream.message("DW", "force apply deletes bytesUsed=" + + flushControl.getDeleteBytesUsed() + " vs ramBuffer=" + + (1024 * 1024 * ramBufferSizeMB)); } applyAllDeletes(deleteQueue); } - - return maybeMerge; + + return actualFlushes > 0; } - - void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes) - throws IOException { + void finishFlush(FlushedSegment newSegment, + FrozenBufferedDeletes bufferedDeletes) throws IOException { // Finish the flushed segment and publish it to IndexWriter if (newSegment == null) { assert bufferedDeletes != null; - if (bufferedDeletes != null && bufferedDeletes.any()) { + if (bufferedDeletes != null + && (bufferedDeletes.anyDeletes() || bufferedDeletes.anyUpdates())) { indexWriter.publishFrozenDeletes(bufferedDeletes); if (infoStream.isEnabled("DW")) { - infoStream.message("DW", "flush: push buffered deletes: " + bufferedDeletes); + infoStream.message("DW", "flush: push buffered deletes: " + + bufferedDeletes); } } } else { - publishFlushedSegment(newSegment, bufferedDeletes); + publishFlushedSegment(newSegment, bufferedDeletes); } } - + final void subtractFlushedNumDocs(int numFlushed) { int oldValue = numDocsInRAM.get(); while (!numDocsInRAM.compareAndSet(oldValue, oldValue - numFlushed)) { @@ -497,55 +598,62 @@ /** * Publishes the flushed segment, segment private deletes (if any) and its - * associated global delete (if present) to IndexWriter. The actual - * publishing operation is synced on IW -> BDS so that the {@link SegmentInfo}'s - * delete generation is always GlobalPacket_deleteGeneration + 1 + * associated global delete (if present) to IndexWriter. The actual publishing + * operation is synced on IW -> BDS so that the {@link SegmentInfo}'s delete + * generation is always GlobalPacket_deleteGeneration + 1 */ - private void publishFlushedSegment(FlushedSegment newSegment, FrozenBufferedDeletes globalPacket) - throws IOException { + private void publishFlushedSegment(FlushedSegment newSegment, + FrozenBufferedDeletes globalPacket) throws IOException { assert newSegment != null; assert newSegment.segmentInfo != null; final FrozenBufferedDeletes segmentDeletes = newSegment.segmentDeletes; - //System.out.println("FLUSH: " + newSegment.segmentInfo.info.name); + // System.out.println("FLUSH: " + newSegment.segmentInfo.info.name); if (infoStream.isEnabled("DW")) { - infoStream.message("DW", "publishFlushedSegment seg-private deletes=" + segmentDeletes); + infoStream.message("DW", "publishFlushedSegment seg-private deletes=" + + segmentDeletes); } - if (segmentDeletes != null && infoStream.isEnabled("DW")) { - infoStream.message("DW", "flush: push buffered seg private deletes: " + segmentDeletes); + infoStream.message("DW", "flush: push buffered seg private deletes: " + + segmentDeletes); } // now publish! - indexWriter.publishFlushedSegment(newSegment.segmentInfo, segmentDeletes, globalPacket); + indexWriter.publishFlushedSegment(newSegment.segmentInfo, segmentDeletes, + globalPacket); } // for asserts private volatile DocumentsWriterDeleteQueue currentFullFlushDelQueue = null; - + // for asserts - private synchronized boolean setFlushingDeleteQueue(DocumentsWriterDeleteQueue session) { + private synchronized boolean setFlushingDeleteQueue( + DocumentsWriterDeleteQueue session) { currentFullFlushDelQueue = session; return true; } /* * FlushAllThreads is synced by IW fullFlushLock. Flushing all threads is a - * two stage operation; the caller must ensure (in try/finally) that finishFlush - * is called after this method, to release the flush lock in DWFlushControl + * two stage operation; the caller must ensure (in try/finally) that + * finishFlush is called after this method, to release the flush lock in + * DWFlushControl */ - final boolean flushAllThreads() - throws IOException { + final boolean flushAllThreads() throws IOException { final DocumentsWriterDeleteQueue flushingDeleteQueue; if (infoStream.isEnabled("DW")) { - infoStream.message("DW", Thread.currentThread().getName() + " startFullFlush"); + infoStream.message("DW", Thread.currentThread().getName() + + " startFullFlush"); } synchronized (this) { pendingChangesInCurrentFullFlush = anyChanges(); flushingDeleteQueue = deleteQueue; - /* Cutover to a new delete queue. This must be synced on the flush control + /* + * Cutover to a new delete queue. This must be synced on the flush control * otherwise a new DWPT could sneak into the loop with an already flushing - * delete queue */ - flushControl.markForFullFlush(); // swaps the delQueue synced on FlushControl + * delete queue + */ + flushControl.markForFullFlush(); // swaps the delQueue synced on + // FlushControl assert setFlushingDeleteQueue(flushingDeleteQueue); } assert currentFullFlushDelQueue != null; @@ -559,10 +667,15 @@ anythingFlushed |= doFlush(flushingDWPT); } // If a concurrent flush is still in flight wait for it - flushControl.waitForFlush(); - if (!anythingFlushed && flushingDeleteQueue.anyChanges()) { // apply deletes if we did not flush any document + flushControl.waitForFlush(); + if (!anythingFlushed && flushingDeleteQueue.anyChanges()) { // apply + // deletes if + // we did not + // flush any + // document if (infoStream.isEnabled("DW")) { - infoStream.message("DW", Thread.currentThread().getName() + ": flush naked frozen global deletes"); + infoStream.message("DW", Thread.currentThread().getName() + + ": flush naked frozen global deletes"); } ticketQueue.addDeletesAndPurge(this, flushingDeleteQueue); } else { @@ -578,7 +691,8 @@ final void finishFullFlush(boolean success) { try { if (infoStream.isEnabled("DW")) { - infoStream.message("DW", Thread.currentThread().getName() + " finishFullFlush success=" + success); + infoStream.message("DW", Thread.currentThread().getName() + + " finishFullFlush success=" + success); } assert setFlushingDeleteQueue(null); if (success) { Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (working copy) @@ -25,15 +25,15 @@ /** * {@link DocumentsWriterDeleteQueue} is a non-blocking linked pending deletes - * queue. In contrast to other queue implementation we only maintain the - * tail of the queue. A delete queue is always used in a context of a set of - * DWPTs and a global delete pool. Each of the DWPT and the global pool need to - * maintain their 'own' head of the queue (as a DeleteSlice instance per DWPT). - * The difference between the DWPT and the global pool is that the DWPT starts - * maintaining a head once it has added its first document since for its segments - * private deletes only the deletes after that document are relevant. The global - * pool instead starts maintaining the head once this instance is created by - * taking the sentinel instance as its initial head. + * queue. In contrast to other queue implementation we only maintain the tail of + * the queue. A delete queue is always used in a context of a set of DWPTs and a + * global delete pool. Each of the DWPT and the global pool need to maintain + * their 'own' head of the queue (as a DeleteSlice instance per DWPT). The + * difference between the DWPT and the global pool is that the DWPT starts + * maintaining a head once it has added its first document since for its + * segments private deletes only the deletes after that document are relevant. + * The global pool instead starts maintaining the head once this instance is + * created by taking the sentinel instance as its initial head. *

* Since each {@link DeleteSlice} maintains its own head and the list is only * single linked the garbage collector takes care of pruning the list for us. @@ -44,14 +44,15 @@ * Each DWPT as well as the global delete pool maintain their private * DeleteSlice instance. In the DWPT case updating a slice is equivalent to * atomically finishing the document. The slice update guarantees a "happens - * before" relationship to all other updates in the same indexing session. When a - * DWPT updates a document it: + * before" relationship to all other updates in the same indexing session. When + * a DWPT updates a document it: * *

    *
  1. consumes a document and finishes its processing
  2. *
  3. updates its private {@link DeleteSlice} either by calling - * {@link #updateSlice(DeleteSlice)} or {@link #add(Term, DeleteSlice)} (if the - * document has a delTerm)
  4. + * {@link #updateSlice(DeleteSlice)} or + * {@link #add(Term, DeleteSlice, FieldsUpdate)} (if the document has a delTerm) + * *
  5. applies all deletes in the slice to its private {@link BufferedDeletes} * and resets it
  6. *
  7. increments its internal document id
  8. @@ -73,6 +74,7 @@ private final DeleteSlice globalSlice; private final BufferedDeletes globalBufferedDeletes; + private final BufferedUpdates globalBufferedUpdates; /* only acquired to update the global deletes */ private final ReentrantLock globalBufferLock = new ReentrantLock(); @@ -83,11 +85,12 @@ } DocumentsWriterDeleteQueue(long generation) { - this(new BufferedDeletes(), generation); + this(new BufferedDeletes(), new BufferedUpdates(), generation); } - DocumentsWriterDeleteQueue(BufferedDeletes globalBufferedDeletes, long generation) { + DocumentsWriterDeleteQueue(BufferedDeletes globalBufferedDeletes, BufferedUpdates globalBufferedUpdates, long generation) { this.globalBufferedDeletes = globalBufferedDeletes; + this.globalBufferedUpdates = globalBufferedUpdates; this.generation = generation; /* * we use a sentinel instance as our initial tail. No slice will ever try to @@ -110,8 +113,8 @@ /** * invariant for document update */ - void add(Term term, DeleteSlice slice) { - final TermNode termNode = new TermNode(term); + void add(Term term, DeleteSlice slice, FieldsUpdate fieldsUpdate) { + final TermNode termNode = new TermNode(term, fieldsUpdate); // System.out.println(Thread.currentThread().getName() + ": push " + termNode + " this=" + this); add(termNode); /* @@ -174,7 +177,7 @@ * and if the global slice is up-to-date * and if globalBufferedDeletes has changes */ - return globalBufferedDeletes.any() || !globalSlice.isEmpty() || globalSlice.sliceTail != tail + return globalBufferedDeletes.any() || globalBufferedUpdates.any() || !globalSlice.isEmpty() || globalSlice.sliceTail != tail || tail.next != null; } finally { globalBufferLock.unlock(); @@ -192,7 +195,7 @@ try { if (updateSlice(globalSlice)) { // System.out.println(Thread.currentThread() + ": apply globalSlice"); - globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT); + globalSlice.apply(globalBufferedDeletes, globalBufferedUpdates, BufferedDeletes.MAX_INT); } } finally { globalBufferLock.unlock(); @@ -217,13 +220,14 @@ try { if (globalSlice.sliceTail != currentTail) { globalSlice.sliceTail = currentTail; - globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT); + globalSlice.apply(globalBufferedDeletes, globalBufferedUpdates, BufferedDeletes.MAX_INT); } // System.out.println(Thread.currentThread().getName() + ": now freeze global buffer " + globalBufferedDeletes); final FrozenBufferedDeletes packet = new FrozenBufferedDeletes( - globalBufferedDeletes, false); + globalBufferedDeletes, globalBufferedUpdates, false); globalBufferedDeletes.clear(); + globalBufferedUpdates.clear(); return packet; } finally { globalBufferLock.unlock(); @@ -257,7 +261,7 @@ sliceHead = sliceTail = currentTail; } - void apply(BufferedDeletes del, int docIDUpto) { + void apply(BufferedDeletes del, BufferedUpdates update, int docIDUpto) { if (sliceHead == sliceTail) { // 0 length slice return; @@ -272,7 +276,7 @@ do { current = current.next; assert current != null : "slice property violated between the head on the tail must not be a null node"; - current.apply(del, docIDUpto); + current.apply(del, update, docIDUpto); // System.out.println(Thread.currentThread().getName() + ": pull " + current + " docIDUpto=" + docIDUpto); } while (current != sliceTail); reset(); @@ -323,7 +327,7 @@ static final AtomicReferenceFieldUpdater nextUpdater = AtomicReferenceFieldUpdater .newUpdater(Node.class, Node.class, "next"); - void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + void apply(BufferedDeletes bufferedDeletes, BufferedUpdates bufferedUpdates, int docIDUpto) { throw new IllegalStateException("sentinel item must never be applied"); } @@ -333,14 +337,20 @@ } private static final class TermNode extends Node { - - TermNode(Term term) { + FieldsUpdate fieldsUpdate; + + TermNode(Term term, FieldsUpdate fieldsUpdate) { super(term); + this.fieldsUpdate = fieldsUpdate; } @Override - void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { - bufferedDeletes.addTerm(item, docIDUpto); + void apply(BufferedDeletes bufferedDeletes, BufferedUpdates bufferedUpdates, int docIDUpto) { + if (fieldsUpdate == null) { + bufferedDeletes.addTerm(item, docIDUpto); + } else { + bufferedUpdates.addTerm(item, new FieldsUpdate(fieldsUpdate, docIDUpto)); + } } @Override @@ -355,7 +365,7 @@ } @Override - void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + void apply(BufferedDeletes bufferedDeletes, BufferedUpdates bufferedUpdates, int docIDUpto) { for (Query query : item) { bufferedDeletes.addQuery(query, docIDUpto); } @@ -368,7 +378,7 @@ } @Override - void apply(BufferedDeletes bufferedDeletes, int docIDUpto) { + void apply(BufferedDeletes bufferedDeletes, BufferedUpdates bufferedUpdates, int docIDUpto) { for (Term term : item) { bufferedDeletes.addTerm(term, docIDUpto); } @@ -387,7 +397,7 @@ try { if (globalSlice.sliceTail != currentTail) { globalSlice.sliceTail = currentTail; - globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT); + globalSlice.apply(globalBufferedDeletes, globalBufferedUpdates, BufferedDeletes.MAX_INT); } return globalBufferedDeletes.any(); } finally { @@ -409,6 +419,10 @@ return globalBufferedDeletes.bytesUsed.get(); } + public long updateBytesUsed() { + return globalBufferedUpdates.bytesUsed.get(); + } + @Override public String toString() { return "DWDQ: [ generation: " + generation + " ]"; Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 1422477) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (working copy) @@ -121,15 +121,21 @@ final SegmentInfoPerCommit segmentInfo; final FieldInfos fieldInfos; final FrozenBufferedDeletes segmentDeletes; + final BufferedUpdates segmentUpdates; final MutableBits liveDocs; + final UpdatedSegmentData liveUpdates; final int delCount; private FlushedSegment(SegmentInfoPerCommit segmentInfo, FieldInfos fieldInfos, - BufferedDeletes segmentDeletes, MutableBits liveDocs, int delCount) { + BufferedDeletes segmentDeletes, MutableBits liveDocs, + int delCount, BufferedUpdates segmentUpdates, + UpdatedSegmentData liveUpdates) { this.segmentInfo = segmentInfo; this.fieldInfos = fieldInfos; - this.segmentDeletes = segmentDeletes != null && segmentDeletes.any() ? new FrozenBufferedDeletes(segmentDeletes, true) : null; + this.segmentDeletes = segmentDeletes != null && segmentDeletes.any() ? new FrozenBufferedDeletes(segmentDeletes, segmentUpdates, true) : null; + this.segmentUpdates = segmentUpdates; this.liveDocs = liveDocs; + this.liveUpdates = liveUpdates; this.delCount = delCount; } } @@ -169,13 +175,16 @@ final TrackingDirectoryWrapper directory; final Directory directoryOrig; final DocState docState; - final DocConsumer consumer; + final IndexingChain indexingChain; final Counter bytesUsed; + DocConsumer consumer; SegmentWriteState flushState; //Deletes for our still-in-RAM (to be flushed next) segment BufferedDeletes pendingDeletes; + BufferedUpdates pendingUpdates; SegmentInfo segmentInfo; // Current segment we are working on + SegmentInfo baseSegmentInfo; // name of the base segment for segmentInfo boolean aborting = false; // True if an abort is pending boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting @@ -204,11 +213,10 @@ bytesUsed = Counter.newCounter(); byteBlockAllocator = new DirectTrackingAllocator(bytesUsed); pendingDeletes = new BufferedDeletes(); + pendingUpdates = new BufferedUpdates(); intBlockAllocator = new IntBlockAllocator(bytesUsed); initialize(); - // this should be the last call in the ctor - // it really sucks that we need to pull this within the ctor and pass this ref to the chain! - consumer = indexingChain.getChain(this); + this.indexingChain = indexingChain; } public DocumentsWriterPerThread(DocumentsWriterPerThread other, FieldInfos.Builder fieldInfos) { @@ -239,7 +247,7 @@ docState.analyzer = analyzer; docState.docID = numDocsInRAM; if (segmentInfo == null) { - initSegmentInfo(); + initSegmentInfo(null, -1); } if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name); @@ -272,17 +280,27 @@ abort(); } } - finishDocument(delTerm); + finishDocument(delTerm, null); } - private void initSegmentInfo() { - String segment = writer.newSegmentName(); - segmentInfo = new SegmentInfo(directoryOrig, Constants.LUCENE_MAIN_VERSION, segment, -1, - false, codec, null, null); + void initSegmentInfo(SegmentInfo info, long updateGen) { + if (info == null) { + String segment = writer.newSegmentName(); + segmentInfo = new SegmentInfo(directoryOrig, + Constants.LUCENE_MAIN_VERSION, segment, -1, false, codec, null, null); + baseSegmentInfo = null; + } else { + baseSegmentInfo = info; + segmentInfo = new SegmentInfo(directoryOrig, + Constants.LUCENE_MAIN_VERSION, IndexFileNames.fileNameFromGeneration( + info.name, "", updateGen, true), -1, false, codec, null, null); + } assert numDocsInRAM == 0; if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { - infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segment + " delQueue=" + deleteQueue); + infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentInfo.name + " delQueue=" + deleteQueue); } + // reset consumer, may have previous segment name as inner state + consumer = indexingChain.getChain(this); } public int updateDocuments(Iterable docs, Analyzer analyzer, Term delTerm) throws IOException { @@ -290,7 +308,7 @@ assert deleteQueue != null; docState.analyzer = analyzer; if (segmentInfo == null) { - initSegmentInfo(); + initSegmentInfo(null, -1); } if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name); @@ -330,7 +348,7 @@ } } - finishDocument(null); + finishDocument(null, null); } allDocsIndexed = true; @@ -338,9 +356,9 @@ // succeeded, but apply it only to docs prior to when // this batch started: if (delTerm != null) { - deleteQueue.add(delTerm, deleteSlice); + deleteQueue.add(delTerm, deleteSlice, null); assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item"; - deleteSlice.apply(pendingDeletes, numDocsInRAM-docCount); + deleteSlice.apply(pendingDeletes, pendingUpdates, numDocsInRAM-docCount); } } finally { @@ -360,7 +378,7 @@ return docCount; } - private void finishDocument(Term delTerm) { + private void finishDocument(Term delTerm, FieldsUpdate fieldsUpdate) { /* * here we actually finish the document in two steps 1. push the delete into * the queue and update our slice. 2. increment the DWPT private document @@ -372,20 +390,21 @@ if (deleteSlice == null) { deleteSlice = deleteQueue.newSlice(); if (delTerm != null) { - deleteQueue.add(delTerm, deleteSlice); + deleteQueue.add(delTerm, deleteSlice, fieldsUpdate); deleteSlice.reset(); } - } else { if (delTerm != null) { - deleteQueue.add(delTerm, deleteSlice); + deleteQueue.add(delTerm, deleteSlice, fieldsUpdate); assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item"; - deleteSlice.apply(pendingDeletes, numDocsInRAM); + deleteSlice.apply(pendingDeletes, pendingUpdates, numDocsInRAM); } else if (deleteQueue.updateSlice(deleteSlice)) { - deleteSlice.apply(pendingDeletes, numDocsInRAM); + deleteSlice.apply(pendingDeletes, pendingUpdates, numDocsInRAM); } } - ++numDocsInRAM; + if (fieldsUpdate == null) { + ++numDocsInRAM; + } } // Buffer a specific docID for deletion. Currently only @@ -422,7 +441,9 @@ /** Reset after a flush */ private void doAfterFlush() { segmentInfo = null; - consumer.doAfterFlush(); + if (consumer != null) { + consumer.doAfterFlush(); + } directory.getCreatedFiles().clear(); fieldInfos = new FieldInfos.Builder(fieldInfos.globalFieldNumbers); parent.subtractFlushedNumDocs(numDocsInRAM); @@ -441,7 +462,7 @@ adding a document. */ if (deleteSlice != null) { // apply all deletes before we flush and release the delete slice - deleteSlice.apply(pendingDeletes, numDocsInRAM); + deleteSlice.apply(pendingDeletes, pendingUpdates, numDocsInRAM); assert deleteSlice.isEmpty(); deleteSlice = null; } @@ -449,13 +470,17 @@ } /** Flush all pending docs to a new segment */ - FlushedSegment flush() throws IOException { + FlushedSegment flush(long updateGen) throws IOException { assert numDocsInRAM > 0; assert deleteSlice == null : "all deletes must be applied in prepareFlush"; + if (segmentInfo == null) { + return null; + } segmentInfo.setDocCount(numDocsInRAM); - flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(), - writer.getConfig().getTermIndexInterval(), - pendingDeletes, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed()))); + IOContext context = new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())); + flushState = new SegmentWriteState(infoStream, directory, segmentInfo, 0, fieldInfos.finish(), + writer.getConfig().getTermIndexInterval(), + pendingDeletes, pendingUpdates, context); final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.; // Apply delete-by-docID now (delete-byDocID only @@ -487,9 +512,14 @@ try { consumer.flush(flushState); pendingDeletes.terms.clear(); - segmentInfo.setFiles(new HashSet(directory.getCreatedFiles())); - - final SegmentInfoPerCommit segmentInfoPerCommit = new SegmentInfoPerCommit(segmentInfo, 0, -1L); + if (updateGen < 0) { + segmentInfo.setFiles(new HashSet(directory.getCreatedFiles())); + } else { + segmentInfo = baseSegmentInfo; + segmentInfo.addFiles(new HashSet(directory.getCreatedFiles())); + } + + final SegmentInfoPerCommit segmentInfoPerCommit = new SegmentInfoPerCommit(segmentInfo, 0, -1L, updateGen); if (infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : (flushState.segmentInfo.getDocCount() - flushState.delCountOnFlush)) + " deleted docs"); infoStream.message("DWPT", "new segment has " + @@ -524,8 +554,9 @@ assert segmentInfo != null; FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos, - segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush); - sealFlushedSegment(fs); + segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush, + pendingUpdates, flushState.liveUpdates); + sealFlushedSegment(fs, updateGen); doAfterFlush(); success = true; @@ -544,7 +575,7 @@ * Seals the {@link SegmentInfo} for the new flushed segment and persists * the deleted documents {@link MutableBits}. */ - void sealFlushedSegment(FlushedSegment flushedSegment) throws IOException { + void sealFlushedSegment(FlushedSegment flushedSegment, long updateGen) throws IOException { assert flushedSegment != null; SegmentInfoPerCommit newSegment = flushedSegment.segmentInfo; @@ -558,7 +589,7 @@ if (writer.useCompoundFile(newSegment)) { // Now build compound file - Collection oldFiles = IndexWriter.createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context); + Collection oldFiles = IndexWriter.createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context, updateGen); newSegment.info.setUseCompoundFile(true); writer.deleteNewFiles(oldFiles); } @@ -567,7 +598,11 @@ // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: - codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, newSegment.info, flushedSegment.fieldInfos, context); + if (updateGen < 0) { + codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, newSegment.info, flushedSegment.fieldInfos, context); + } else { + codec.segmentInfoFormat().getSegmentInfoWriter().writeFilesList(directory, newSegment.info, updateGen, context); + } // TODO: ideally we would freeze newSegment here!! // because any changes after writing the .si will be @@ -651,9 +686,9 @@ } } - PerDocWriteState newPerDocWriteState(String segmentSuffix) { + PerDocWriteState newPerDocWriteState() { assert segmentInfo != null; - return new PerDocWriteState(infoStream, directory, segmentInfo, bytesUsed, segmentSuffix, IOContext.DEFAULT); + return new PerDocWriteState(infoStream, directory, segmentInfo, bytesUsed, "", IOContext.DEFAULT); } @Override @@ -662,4 +697,15 @@ + ", segment=" + (segmentInfo != null ? segmentInfo.name : "null") + ", aborting=" + aborting + ", numDocsInRAM=" + numDocsInRAM + ", deleteQueue=" + deleteQueue + "]"; } + + void updateFields(Term term, FieldsUpdate fieldUpdates) { + finishDocument(term, fieldUpdates); + } + + void clearDeleteSlice() { + if (deleteSlice != null) { + assert deleteSlice.sliceHead == deleteSlice.sliceTail; + deleteSlice = null; + } + } } Index: lucene/core/src/java/org/apache/lucene/index/FieldGenerationReplacements.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FieldGenerationReplacements.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/FieldGenerationReplacements.java (working copy) @@ -0,0 +1,83 @@ +package org.apache.lucene.index; + +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.TreeMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Holds for a certain field in a stacked segment the documents document the + * generation in which the last replacement (of the relevant field) took place. + */ +public class FieldGenerationReplacements implements Iterable> { + + TreeMap map = null; + + /** + * Set the generation value for a given document. + * + * @param docId + * Document id. + * @param generation + * The requested generation. + */ + public void set(int docId, long generation) { + if (map == null) { + map = new TreeMap(); + } + assert generation > 0 && generation <= Integer.MAX_VALUE; + map.put(docId, generation); + } + + /** + * Get the generation value for a given document. + * + * @param docId + * Document id. + * @return The requested generation, or -1 if the document has no generation. + */ + public long get(int docId) { + if (map == null) { + return -1; + } + final Long val = map.get(docId); + if (val == null) { + return -1; + } + return val; + } + + public void merge(FieldGenerationReplacements other) { + if (map == null) { + map = other.map; + } else if (other != null) { + map.putAll(other.map); + } + } + + @Override + public Iterator> iterator() { + return map.entrySet().iterator(); + } + + public int size() { + return map.size(); + } + +} Index: lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (working copy) @@ -0,0 +1,86 @@ +package org.apache.lucene.index; + +import org.apache.lucene.analysis.Analyzer; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class FieldsUpdate implements Comparable { + + /** + * Specifies the operation to perform when updating fields. + */ + enum Operation { + /** + * Add the given fields to all existing documents matching the update + * criterion. + */ + ADD_FIELDS, + + /** + * Use the given fields to replace fields with same names in all existing + * documents matching the update criterion. + */ + REPLACE_FIELDS + } + + final Operation operation; + final IndexDocument fields; + final Analyzer analyzer; + final int docIDUpto; + + /** + * An update of fields which is not assigned to a specific live segment. + * + * @param operation + * The type of update operation. + * @param fields + * The fields to use in the update. + * @param analyzer + * The analyzer to use in the update. + */ + public FieldsUpdate(Operation operation, IndexDocument fields, + Analyzer analyzer) { + this.operation = operation; + this.fields = fields; + this.analyzer = analyzer; + this.docIDUpto = -1; + } + + /** + * An update of fields for a specific live segment. + * + * @param other + * A non-specific update with the update data. + * @param docIDUpto + * The doc ID in the live segment up to which the update should be + * applied. + */ + public FieldsUpdate(FieldsUpdate other, int docIDUpto) { + this.operation = other.operation; + this.fields = other.fields; + this.analyzer = other.analyzer; + this.docIDUpto = docIDUpto; + } + + /* Order FrieldsUpdate by increasing docIDUpto */ + @Override + public int compareTo(FieldsUpdate other) { + return this.docIDUpto - other.docIDUpto; + } + +} Index: lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (revision 1420477) +++ lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (working copy) @@ -19,7 +19,9 @@ import java.io.IOException; import java.util.Comparator; +import java.util.Iterator; import java.util.Map; +import java.util.SortedSet; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; @@ -78,7 +80,6 @@ @Override void skippingLongTerm() {} - @Override public int compareTo(FreqProxTermsWriterPerField other) { return fieldInfo.name.compareTo(other.fieldInfo.name); } @@ -365,7 +366,14 @@ } else { segDeletes = null; } - + + final Map> segUpdates; + if (state.segUpdates != null && state.segUpdates.terms.size() > 0) { + segUpdates = state.segUpdates.terms; + } else { + segUpdates = null; + } + final int[] termIDs = termsHashPerField.sortPostings(termComp); final int numTerms = termsHashPerField.bytesHash.size(); final BytesRef text = new BytesRef(); @@ -398,6 +406,8 @@ final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text); + Term term = new Term(fieldName, text); + final int delDocLimit; if (segDeletes != null) { protoTerm.bytes = text; @@ -411,6 +421,19 @@ delDocLimit = 0; } + final SortedSet termUpdates; + Iterator updatesIterator = null; + FieldsUpdate nextUpdate = null; + if (segUpdates != null) { + termUpdates = segUpdates.get(term); + if (termUpdates != null && !termUpdates.isEmpty()) { + updatesIterator = termUpdates.iterator(); + nextUpdate = updatesIterator.next(); + } + } else { + termUpdates = null; + } + // Now termStates has numToMerge FieldMergeStates // which all share the same term. Now we must // interleave the docID streams. @@ -483,6 +506,23 @@ } } + // make sure we update the relevant documents according to the doc ID + // in which the updates arrived + while (nextUpdate != null && docID > nextUpdate.docIDUpto) { + if (updatesIterator.hasNext()) { + nextUpdate = updatesIterator.next(); + } else { + nextUpdate = null; + } + } + + if (nextUpdate != null) { + if (state.liveUpdates == null) { + state.liveUpdates = new UpdatedSegmentData(); + } + state.liveUpdates.addUpdate(docID, nextUpdate); + } + totTF += termFreq; // Carefully copy over the prox + payload info, Index: lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (working copy) @@ -17,8 +17,12 @@ * limitations under the License. */ +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedSet; import org.apache.lucene.search.Query; import org.apache.lucene.util.ArrayUtil; @@ -35,6 +39,7 @@ /* Query we often undercount (say 24 bytes), plus int. */ final static int BYTES_PER_DEL_QUERY = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + 24; + final static List EMPTY_LIST = new ArrayList(0); // Terms, in sorted order: final PrefixCodedTerms terms; int termCount; // just for debugging @@ -49,31 +54,68 @@ final boolean isSegmentPrivate; // set to true iff this frozen packet represents // a segment private deletes. in that case is should // only have Queries + + // Updated terms, in sorted order: + final PrefixCodedTerms updateTerms; + // Updated fields per term + final FieldsUpdate[][] updateArrays; - - public FrozenBufferedDeletes(BufferedDeletes deletes, boolean isSegmentPrivate) { + public FrozenBufferedDeletes(BufferedDeletes deletes, BufferedUpdates updates, boolean isSegmentPrivate) { this.isSegmentPrivate = isSegmentPrivate; - assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries"; - Term termsArray[] = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]); - termCount = termsArray.length; - ArrayUtil.mergeSort(termsArray); - PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder(); - for (Term term : termsArray) { - builder.add(term); + int localBytesUsed = 0; + if (deletes != null) { + assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries"; + Term termsArray[] = deletes.terms.keySet().toArray( + new Term[deletes.terms.size()]); + termCount = termsArray.length; + ArrayUtil.mergeSort(termsArray); + PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder(); + for (Term term : termsArray) { + builder.add(term); + } + terms = builder.finish(); + localBytesUsed += (int) terms.getSizeInBytes(); + + queries = new Query[deletes.queries.size()]; + queryLimits = new int[deletes.queries.size()]; + int upto = 0; + for (Map.Entry ent : deletes.queries.entrySet()) { + queries[upto] = ent.getKey(); + queryLimits[upto] = ent.getValue(); + upto++; + } + + localBytesUsed += queries.length * BYTES_PER_DEL_QUERY; + numTermDeletes = deletes.numTermDeletes.get(); + } else { + terms = null; + numTermDeletes = 0; + queries = null; + queryLimits = null; } - terms = builder.finish(); - queries = new Query[deletes.queries.size()]; - queryLimits = new int[deletes.queries.size()]; - int upto = 0; - for(Map.Entry ent : deletes.queries.entrySet()) { - queries[upto] = ent.getKey(); - queryLimits[upto] = ent.getValue(); - upto++; + // freeze updates + if (updates != null && !updates.terms.isEmpty()) { + PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder(); + updateArrays = new FieldsUpdate[updates.terms.size()][]; + localBytesUsed += RamUsageEstimator.NUM_BYTES_OBJECT_REF * (1 + updateArrays.length); + int i = 0; + for (Entry> entry : updates.terms.entrySet()) { + builder.add(entry.getKey()); + SortedSet updateList = entry.getValue(); + // TODO : calculate bytes of updates? + updateArrays[i] = updateList.toArray(new FieldsUpdate[updateList.size()]); + localBytesUsed += RamUsageEstimator.NUM_BYTES_OBJECT_REF * (1 + updateArrays[i].length); + i++; + } + updateTerms = builder.finish(); + localBytesUsed += (int) updateTerms.getSizeInBytes(); + } else { + updateTerms = null; + updateArrays = null; } - - bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY; - numTermDeletes = deletes.numTermDeletes.get(); + + bytesUsed = localBytesUsed; } public void setDelGen(long gen) { @@ -90,6 +132,9 @@ return new Iterable() { @Override public Iterator iterator() { + if (terms == null) { + return EMPTY_LIST.iterator(); + } return terms.iterator(); } }; @@ -129,9 +174,12 @@ if (numTermDeletes != 0) { s += " " + numTermDeletes + " deleted terms (unique count=" + termCount + ")"; } - if (queries.length != 0) { + if (queries != null && queries.length != 0) { s += " " + queries.length + " deleted queries"; } + if (updateArrays != null && updateArrays.length > 0) { + s += " " + updateArrays.length + " updates"; + } if (bytesUsed != 0) { s += " bytesUsed=" + bytesUsed; } @@ -139,7 +187,11 @@ return s; } - boolean any() { - return termCount > 0 || queries.length > 0; + boolean anyDeletes() { + return termCount > 0 || (queries != null && queries.length > 0); } + + boolean anyUpdates() { + return updateTerms != null; + } } Index: lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/IndexFileNames.java (working copy) @@ -29,7 +29,7 @@ * name matches an extension ({@link #matchesExtension(String, String) * matchesExtension}), as well as generating file names from a segment name, * generation and extension ( - * {@link #fileNameFromGeneration(String, String, long) fileNameFromGeneration}, + * {@link #fileNameFromGeneration(String, String, long, boolean) fileNameFromGeneration}, * {@link #segmentFileName(String, String, String) segmentFileName}). * *

    NOTE: extensions used by codecs are not @@ -83,8 +83,9 @@ * @param base main part of the file name * @param ext extension of the filename * @param gen generation + * @param isUpdate whether the file is an update file or not */ - public static String fileNameFromGeneration(String base, String ext, long gen) { + public static String fileNameFromGeneration(String base, String ext, long gen, boolean isUpdate) { if (gen == -1) { return null; } else if (gen == 0) { @@ -94,8 +95,11 @@ // The '6' part in the length is: 1 for '.', 1 for '_' and 4 as estimate // to the gen length as string (hopefully an upper limit so SB won't // expand in the middle. - StringBuilder res = new StringBuilder(base.length() + 6 + ext.length()) - .append(base).append('_').append(Long.toString(gen, Character.MAX_RADIX)); + StringBuilder res = new StringBuilder(base.length() + 6 + ext.length()); + if (isUpdate) { + res.append('_'); + } + res.append(base).append('_').append(generationString(gen)); if (ext.length() > 0) { res.append('.').append(ext); } @@ -103,7 +107,36 @@ } } + public static String generationString(long gen) { + return Long.toString(gen, Character.MAX_RADIX); + } + /** + * Computes the base name of an updated segment from base and generation. If + * the generation < 0, the file name is null. otherwise, the file name is + * <base>_upd_<gen>.
    + * + * @param baseName + * base segment string + * @param gen + * update generation + */ + public static String updatedSegmentFileNameFromGeneration(String baseName, + long gen) { + if (gen <= 0) { + return null; + } else { + assert gen > 0; + // The '10' part in the length is: 3 for '_', 3 for "upd" and 4 as + // estimate to the gen length as string (hopefully an upper limit so SB + // won't expand in the middle. + StringBuilder res = new StringBuilder(baseName.length() + 10).append('_') + .append(baseName).append('_').append(generationString(gen)); + return res.toString(); + } + } + + /** * Returns a file name that includes the given segment name, your own custom * name and extension. The format of the filename is: * <segmentName>(_<name>)(.<ext>). @@ -201,5 +234,5 @@ // All files created by codecs much match this pattern (we // check this in SegmentInfo.java): - static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[a-z0-9]+(_.*)?\\..*"); + static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[_]?[a-z0-9]+(_.*)?\\..*"); } Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1420477) +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -34,7 +34,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.SegmentInfoWriter; import org.apache.lucene.index.FieldInfos.FieldNumbers; +import org.apache.lucene.index.FieldsUpdate.Operation; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.MergePolicy.MergeTrigger; import org.apache.lucene.index.MergeState.CheckAbort; @@ -54,185 +56,188 @@ import org.apache.lucene.util.ThreadInterruptedException; /** - An IndexWriter creates and maintains an index. + * An IndexWriter creates and maintains an index. + * + *

    + * The {@link OpenMode} option on + * {@link IndexWriterConfig#setOpenMode(OpenMode)} determines whether a new + * index is created, or whether an existing index is opened. Note that you can + * open an index with {@link OpenMode#CREATE} even while readers are using the + * index. The old readers will continue to search the "point in time" snapshot + * they had opened, and won't see the newly created index until they re-open. If + * {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a new index + * if there is not already an index at the provided path and otherwise open the + * existing index. + *

    + * + *

    + * In either case, documents are added with {@link #addDocument(IndexDocument) + * addDocument} and removed with {@link #deleteDocuments(Term)} or + * {@link #deleteDocuments(Query)}. A document can be updated with + * {@link #updateDocument(Term, IndexDocument) updateDocument} (which just + * deletes and then adds the entire document). When finished adding, deleting + * and updating documents, {@link #close() close} should be called. + *

    + * + * + *

    + * These changes are buffered in memory and periodically flushed to the + * {@link Directory} (during the above method calls). A flush is triggered when + * there are enough added documents since the last flush. Flushing is triggered + * either by RAM usage of the documents (see + * {@link IndexWriterConfig#setRAMBufferSizeMB}) or the number of added + * documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}). The + * default is to flush when RAM usage hits + * {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For best indexing + * speed you should flush by RAM usage with a large RAM buffer. Additionally, if + * IndexWriter reaches the configured number of buffered deletes (see + * {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) the deleted terms and + * queries are flushed and applied to existing segments. In contrast to the + * other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and + * {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms won't + * trigger a segment flush. Note that flushing just moves the internal buffered + * state in IndexWriter into the index, but these changes are not visible to + * IndexReader until either {@link #commit()} or {@link #close} is called. A + * flush may also trigger one or more segment merges which by default run with a + * background thread so as not to block the addDocument calls (see below for changing the {@link MergeScheduler}). + *

    + * + *

    + * Opening an IndexWriter creates a lock file for the directory in + * use. Trying to open another IndexWriter on the same directory + * will lead to a {@link LockObtainFailedException}. The + * {@link LockObtainFailedException} is also thrown if an IndexReader on the + * same directory is used to delete documents from the index. + *

    + * + * + *

    + * Expert: IndexWriter allows an optional + * {@link IndexDeletionPolicy} implementation to be specified. You can use this + * to control when prior commits are deleted from the index. The default policy + * is {@link KeepOnlyLastCommitDeletionPolicy} which removes all prior commits + * as soon as a new commit is done (this matches behavior before 2.2). Creating + * your own policy can allow you to explicitly keep previous "point in time" + * commits alive in the index for some time, to allow readers to refresh to the + * new commit without having the old commit deleted out from under them. This is + * necessary on filesystems like NFS that do not support "delete on last + * close" semantics, which Lucene's "point in time" search normally relies on. + *

    + * + * + *

    + * Expert: IndexWriter allows you to separately change the + * {@link MergePolicy} and the {@link MergeScheduler}. The {@link MergePolicy} + * is invoked whenever there are changes to the segments in the index. Its role + * is to select which merges to do, if any, and return a + * {@link MergePolicy.MergeSpecification} describing the merges. The default is + * {@link LogByteSizeMergePolicy}. Then, the {@link MergeScheduler} is invoked + * with the requested merges and it decides when and how to run the merges. The + * default is {@link ConcurrentMergeScheduler}. + *

    + * + * + *

    + * NOTE: if you hit an OutOfMemoryError then IndexWriter will quietly + * record this fact and block all future segment commits. This is a defensive + * measure in case any internal state (buffered documents and deletions) were + * corrupted. Any subsequent calls to {@link #commit()} will throw an + * IllegalStateException. The only course of action is to call {@link #close()}, + * which internally will call {@link #rollback()}, to undo any changes to the + * index since the last commit. You can also just call {@link #rollback()} + * directly. + *

    + * + * + *

    + * NOTE: {@link IndexWriter} instances are completely thread safe, + * meaning multiple threads can call any of its methods, concurrently. If your + * application requires external synchronization, you should not + * synchronize on the IndexWriter instance as this may cause + * deadlock; use your own (non-Lucene) objects instead. + *

    + * + *

    + * NOTE: If you call Thread.interrupt() on a thread that's + * within IndexWriter, IndexWriter will try to catch this (eg, if it's in a + * wait() or Thread.sleep()), and will then throw the unchecked exception + * {@link ThreadInterruptedException} and clear the interrupt status on + * the thread. + *

    + */ -

    The {@link OpenMode} option on - {@link IndexWriterConfig#setOpenMode(OpenMode)} determines - whether a new index is created, or whether an existing index is - opened. Note that you can open an index with {@link OpenMode#CREATE} - even while readers are using the index. The old readers will - continue to search the "point in time" snapshot they had opened, - and won't see the newly created index until they re-open. If - {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a - new index if there is not already an index at the provided path - and otherwise open the existing index.

    - -

    In either case, documents are added with {@link #addDocument(IndexDocument) - addDocument} and removed with {@link #deleteDocuments(Term)} or {@link - #deleteDocuments(Query)}. A document can be updated with {@link - #updateDocument(Term, IndexDocument) updateDocument} (which just deletes - and then adds the entire document). When finished adding, deleting - and updating documents, {@link #close() close} should be called.

    - - -

    These changes are buffered in memory and periodically - flushed to the {@link Directory} (during the above method - calls). A flush is triggered when there are enough added documents - since the last flush. Flushing is triggered either by RAM usage of the - documents (see {@link IndexWriterConfig#setRAMBufferSizeMB}) or the - number of added documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}). - The default is to flush when RAM usage hits - {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For - best indexing speed you should flush by RAM usage with a - large RAM buffer. Additionally, if IndexWriter reaches the configured number of - buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) - the deleted terms and queries are flushed and applied to existing segments. - In contrast to the other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and - {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms - won't trigger a segment flush. Note that flushing just moves the - internal buffered state in IndexWriter into the index, but - these changes are not visible to IndexReader until either - {@link #commit()} or {@link #close} is called. A flush may - also trigger one or more segment merges which by default - run with a background thread so as not to block the - addDocument calls (see below - for changing the {@link MergeScheduler}).

    - -

    Opening an IndexWriter creates a lock file for the directory in use. Trying to open - another IndexWriter on the same directory will lead to a - {@link LockObtainFailedException}. The {@link LockObtainFailedException} - is also thrown if an IndexReader on the same directory is used to delete documents - from the index.

    - - -

    Expert: IndexWriter allows an optional - {@link IndexDeletionPolicy} implementation to be - specified. You can use this to control when prior commits - are deleted from the index. The default policy is {@link - KeepOnlyLastCommitDeletionPolicy} which removes all prior - commits as soon as a new commit is done (this matches - behavior before 2.2). Creating your own policy can allow - you to explicitly keep previous "point in time" commits - alive in the index for some time, to allow readers to - refresh to the new commit without having the old commit - deleted out from under them. This is necessary on - filesystems like NFS that do not support "delete on last - close" semantics, which Lucene's "point in time" search - normally relies on.

    - -

    Expert: - IndexWriter allows you to separately change - the {@link MergePolicy} and the {@link MergeScheduler}. - The {@link MergePolicy} is invoked whenever there are - changes to the segments in the index. Its role is to - select which merges to do, if any, and return a {@link - MergePolicy.MergeSpecification} describing the merges. - The default is {@link LogByteSizeMergePolicy}. Then, the {@link - MergeScheduler} is invoked with the requested merges and - it decides when and how to run the merges. The default is - {@link ConcurrentMergeScheduler}.

    - -

    NOTE: if you hit an - OutOfMemoryError then IndexWriter will quietly record this - fact and block all future segment commits. This is a - defensive measure in case any internal state (buffered - documents and deletions) were corrupted. Any subsequent - calls to {@link #commit()} will throw an - IllegalStateException. The only course of action is to - call {@link #close()}, which internally will call {@link - #rollback()}, to undo any changes to the index since the - last commit. You can also just call {@link #rollback()} - directly.

    - -

    NOTE: {@link - IndexWriter} instances are completely thread - safe, meaning multiple threads can call any of its - methods, concurrently. If your application requires - external synchronization, you should not - synchronize on the IndexWriter instance as - this may cause deadlock; use your own (non-Lucene) objects - instead.

    - -

    NOTE: If you call - Thread.interrupt() on a thread that's within - IndexWriter, IndexWriter will try to catch this (eg, if - it's in a wait() or Thread.sleep()), and will then throw - the unchecked exception {@link ThreadInterruptedException} - and clear the interrupt status on the thread.

    -*/ - /* - * Clarification: Check Points (and commits) - * IndexWriter writes new index files to the directory without writing a new segments_N - * file which references these new files. It also means that the state of - * the in memory SegmentInfos object is different than the most recent - * segments_N file written to the directory. - * - * Each time the SegmentInfos is changed, and matches the (possibly - * modified) directory files, we have a new "check point". - * If the modified/new SegmentInfos is written to disk - as a new - * (generation of) segments_N file - this check point is also an - * IndexCommit. - * - * A new checkpoint always replaces the previous checkpoint and - * becomes the new "front" of the index. This allows the IndexFileDeleter - * to delete files that are referenced only by stale checkpoints. - * (files that were created since the last commit, but are no longer - * referenced by the "front" of the index). For this, IndexFileDeleter - * keeps track of the last non commit checkpoint. + * Clarification: Check Points (and commits) IndexWriter writes new index files + * to the directory without writing a new segments_N file which references these + * new files. It also means that the state of the in memory SegmentInfos object + * is different than the most recent segments_N file written to the directory. + * + * Each time the SegmentInfos is changed, and matches the (possibly modified) + * directory files, we have a new "check point". If the modified/new + * SegmentInfos is written to disk - as a new (generation of) segments_N file - + * this check point is also an IndexCommit. + * + * A new checkpoint always replaces the previous checkpoint and becomes the new + * "front" of the index. This allows the IndexFileDeleter to delete files that + * are referenced only by stale checkpoints. (files that were created since the + * last commit, but are no longer referenced by the "front" of the index). For + * this, IndexFileDeleter keeps track of the last non commit checkpoint. */ public class IndexWriter implements Closeable, TwoPhaseCommit { private static final int UNBOUNDED_MAX_MERGE_SEGMENTS = -1; - /** * Name of the write lock in the index. */ public static final String WRITE_LOCK_NAME = "write.lock"; - + /** - * Absolute hard maximum length for a term, in bytes once - * encoded as UTF8. If a term arrives from the analyzer - * longer than this length, it is skipped and a message is - * printed to infoStream, if set (see {@link - * IndexWriterConfig#setInfoStream(InfoStream)}). + * Absolute hard maximum length for a term, in bytes once encoded as UTF8. If + * a term arrives from the analyzer longer than this length, it is skipped and + * a message is printed to infoStream, if set (see + * {@link IndexWriterConfig#setInfoStream(InfoStream)}). */ public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8; volatile private boolean hitOOM; - - private final Directory directory; // where this index resides - private final Analyzer analyzer; // how to analyze text - - private volatile long changeCount; // increments every time a change is completed + + private final Directory directory; // where this index resides + private final Analyzer analyzer; // how to analyze text + + private volatile long changeCount; // increments every time a change is + // completed private long lastCommitChangeCount; // last changeCount that was committed - - private List rollbackSegments; // list of segmentInfo we will fallback to if the commit fails - - volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) + + private List rollbackSegments; // list of segmentInfo we + // will fallback to if + // the commit fails + + volatile SegmentInfos pendingCommit; // set when a commit is pending (after + // prepareCommit() & before commit()) volatile long pendingCommitChangeCount; - + private Collection filesToCommit; - - final SegmentInfos segmentInfos; // the segments + + final SegmentInfos segmentInfos; // the segments final FieldNumbers globalFieldNumberMap; - + private DocumentsWriter docWriter; final IndexFileDeleter deleter; - + // used by forceMerge to note those needing merging private Map segmentsToMerge = new HashMap(); private int mergeMaxNumSegments; - + private Lock writeLock; - + private volatile boolean closed; private volatile boolean closing; - + // Holds all SegmentInfo instances currently involved in // merges private HashSet mergingSegments = new HashSet(); - + private MergePolicy mergePolicy; private final MergeScheduler mergeScheduler; private LinkedList pendingMerges = new LinkedList(); @@ -240,95 +245,106 @@ private List mergeExceptions = new ArrayList(); private long mergeGen; private boolean stopMerges; - + final AtomicInteger flushCount = new AtomicInteger(); final AtomicInteger flushDeletesCount = new AtomicInteger(); - + final ReaderPool readerPool = new ReaderPool(); final BufferedDeletesStream bufferedDeletesStream; - + + private boolean updatesPending; + // This is a "write once" variable (like the organic dye // on a DVD-R that may or may not be heated by a laser and // then cooled to permanently record the event): it's // false, until getReader() is called for the first time, // at which point it's switched to true and never changes - // back to false. Once this is true, we hold open and + // back to false. Once this is true, we hold open and // reuse SegmentReader instances internally for applying // deletes, doing merges, and reopening near real-time // readers. private volatile boolean poolReaders; - + // The instance that was passed to the constructor. It is saved only in order // to allow users to query an IndexWriter settings. private final LiveIndexWriterConfig config; - + DirectoryReader getReader() throws IOException { return getReader(true); } - + /** - * Expert: returns a readonly reader, covering all - * committed as well as un-committed changes to the index. - * This provides "near real-time" searching, in that - * changes made during an IndexWriter session can be - * quickly made available for searching without closing - * the writer nor calling {@link #commit}. - * - *

    Note that this is functionally equivalent to calling - * {#flush} and then opening a new reader. But the turnaround time of this - * method should be faster since it avoids the potentially - * costly {@link #commit}.

    - * - *

    You must close the {@link IndexReader} returned by - * this method once you are done using it.

    - * - *

    It's near real-time because there is no hard - * guarantee on how quickly you can get a new reader after - * making changes with IndexWriter. You'll have to - * experiment in your situation to determine if it's - * fast enough. As this is a new and experimental - * feature, please report back on your findings so we can - * learn, improve and iterate.

    - * - *

    The resulting reader supports {@link - * DirectoryReader#openIfChanged}, but that call will simply forward - * back to this method (though this may change in the - * future).

    - * - *

    The very first time this method is called, this - * writer instance will make every effort to pool the - * readers that it opens for doing merges, applying - * deletes, etc. This means additional resources (RAM, - * file descriptors, CPU time) will be consumed.

    - * - *

    For lower latency on reopening a reader, you should - * call {@link IndexWriterConfig#setMergedSegmentWarmer} to - * pre-warm a newly merged segment before it's committed - * to the index. This is important for minimizing - * index-to-search delay after a large merge.

    - * - *

    If an addIndexes* call is running in another thread, - * then this reader will only search those segments from - * the foreign index that have been successfully copied - * over, so far

    . - * - *

    NOTE: Once the writer is closed, any - * outstanding readers may continue to be used. However, - * if you attempt to reopen any of those readers, you'll - * hit an {@link AlreadyClosedException}.

    - * + * Expert: returns a readonly reader, covering all committed as well as + * un-committed changes to the index. This provides "near real-time" + * searching, in that changes made during an IndexWriter session can be + * quickly made available for searching without closing the writer nor calling + * {@link #commit}. + * + *

    + * Note that this is functionally equivalent to calling {#flush} and then + * opening a new reader. But the turnaround time of this method should be + * faster since it avoids the potentially costly {@link #commit}. + *

    + * + *

    + * You must close the {@link IndexReader} returned by this method once you are + * done using it. + *

    + * + *

    + * It's near real-time because there is no hard guarantee on how + * quickly you can get a new reader after making changes with IndexWriter. + * You'll have to experiment in your situation to determine if it's fast + * enough. As this is a new and experimental feature, please report back on + * your findings so we can learn, improve and iterate. + *

    + * + *

    + * The resulting reader supports {@link DirectoryReader#openIfChanged}, but + * that call will simply forward back to this method (though this may change + * in the future). + *

    + * + *

    + * The very first time this method is called, this writer instance will make + * every effort to pool the readers that it opens for doing merges, applying + * deletes, etc. This means additional resources (RAM, file descriptors, CPU + * time) will be consumed. + *

    + * + *

    + * For lower latency on reopening a reader, you should call + * {@link IndexWriterConfig#setMergedSegmentWarmer} to pre-warm a newly merged + * segment before it's committed to the index. This is important for + * minimizing index-to-search delay after a large merge. + *

    + * + *

    + * If an addIndexes* call is running in another thread, then this reader will + * only search those segments from the foreign index that have been + * successfully copied over, so far + *

    + * . + * + *

    + * NOTE: Once the writer is closed, any outstanding readers may + * continue to be used. However, if you attempt to reopen any of those + * readers, you'll hit an {@link AlreadyClosedException}. + *

    + * * @lucene.experimental - * - * @return IndexReader that covers entire index plus all - * changes made so far by this IndexWriter instance - * - * @throws IOException If there is a low-level I/O error + * + * @return IndexReader that covers entire index plus all changes made so far + * by this IndexWriter instance + * + * @throws IOException + * If there is a low-level I/O error */ DirectoryReader getReader(boolean applyAllDeletes) throws IOException { ensureOpen(); - + final long tStart = System.currentTimeMillis(); - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "flush at getReader"); } @@ -340,18 +356,18 @@ doBeforeFlush(); boolean anySegmentFlushed = false; /* - * for releasing a NRT reader we must ensure that - * DW doesn't add any segments or deletes until we are - * done with creating the NRT DirectoryReader. - * We release the two stage full flush after we are done opening the - * directory reader! + * for releasing a NRT reader we must ensure that DW doesn't add any + * segments or deletes until we are done with creating the NRT + * DirectoryReader. We release the two stage full flush after we are done + * opening the directory reader! */ synchronized (fullFlushLock) { boolean success = false; try { anySegmentFlushed = docWriter.flushAllThreads(); if (!anySegmentFlushed) { - // prevent double increment since docWriter#doFlush increments the flushcount + // prevent double increment since docWriter#doFlush increments the + // flushcount // if we flushed anything. flushCount.incrementAndGet(); } @@ -359,11 +375,12 @@ // Prevent segmentInfos from changing while opening the // reader; in theory we could do similar retry logic, // just like we do when loading segments_N - synchronized(this) { + synchronized (this) { maybeApplyDeletes(applyAllDeletes); r = StandardDirectoryReader.open(this, segmentInfos, applyAllDeletes); if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "return reader version=" + r.getVersion() + " reader=" + r); + infoStream.message("IW", "return reader version=" + r.getVersion() + + " reader=" + r); } } } catch (OutOfMemoryError oom) { @@ -385,30 +402,32 @@ maybeMerge(MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS); } if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "getReader took " + (System.currentTimeMillis() - tStart) + " msec"); + infoStream.message("IW", "getReader took " + + (System.currentTimeMillis() - tStart) + " msec"); } return r; } - - /** Holds shared SegmentReader instances. IndexWriter uses - * SegmentReaders for 1) applying deletes, 2) doing - * merges, 3) handing out a real-time reader. This pool - * reuses instances of the SegmentReaders in all these - * places if it is in "near real-time mode" (getReader() - * has been called on this instance). */ - + + /** + * Holds shared SegmentReader instances. IndexWriter uses SegmentReaders for + * 1) applying deletes, 2) doing merges, 3) handing out a real-time reader. + * This pool reuses instances of the SegmentReaders in all these places if it + * is in "near real-time mode" (getReader() has been called on this instance). + */ + class ReaderPool { private final Map readerMap = new HashMap(); - + // used only by asserts public synchronized boolean infoIsLive(SegmentInfoPerCommit info) { int idx = segmentInfos.indexOf(info); - assert idx != -1: "info=" + info + " isn't live"; - assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos"; + assert idx != -1 : "info=" + info + " isn't live"; + assert segmentInfos.info(idx) == info : "info=" + info + + " doesn't match live info in segmentInfos"; return true; } - + public synchronized void drop(SegmentInfoPerCommit info) throws IOException { final ReadersAndLiveDocs rld = readerMap.get(info); if (rld != null) { @@ -417,36 +436,40 @@ rld.dropReaders(); } } - + public synchronized void release(ReadersAndLiveDocs rld) throws IOException { - + // Matches incRef in get: rld.decRef(); - + // Pool still holds a ref: assert rld.refCount() >= 1; - + + boolean updatesWritten = rld.writeLiveUpdates(directory, codec); + if (!poolReaders && rld.refCount() == 1) { // This is the last ref to this RLD, and we're not // pooling, so remove it: - if (rld.writeLiveDocs(directory)) { + if (rld.writeLiveDocs(directory) || updatesWritten) { // Make sure we only write del docs for a live segment: assert infoIsLive(rld.info); // Must checkpoint w/ deleter, because we just // created created new _X_N.del file. deleter.checkpoint(segmentInfos, false); } - + rld.dropReaders(); readerMap.remove(rld.info); } } - - /** Remove all our references to readers, and commits - * any pending changes. */ + + /** + * Remove all our references to readers, and commits any pending changes. + */ synchronized void dropAll(boolean doSave) throws IOException { - final Iterator> it = readerMap.entrySet().iterator(); - while(it.hasNext()) { + final Iterator> it = readerMap + .entrySet().iterator(); + while (it.hasNext()) { final ReadersAndLiveDocs rld = it.next().getValue(); if (doSave && rld.writeLiveDocs(directory)) { // Make sure we only write del docs for a live segment: @@ -455,13 +478,13 @@ // created created new _X_N.del file. deleter.checkpoint(segmentInfos, false); } - + // Important to remove as-we-go, not with .clear() // in the end, in case we hit an exception; // otherwise we could over-decref if close() is // called again: it.remove(); - + // NOTE: it is allowed that these decRefs do not // actually close the SRs; this happens when a // near real-time reader is kept open after the @@ -470,12 +493,12 @@ } assert readerMap.size() == 0; } - + /** - * Commit live docs changes for the segment readers for - * the provided infos. - * - * @throws IOException If there is a low-level I/O error + * Commit live docs changes for the segment readers for the provided infos. + * + * @throws IOException + * If there is a low-level I/O error */ public synchronized void commit(SegmentInfos infos) throws IOException { for (SegmentInfoPerCommit info : infos) { @@ -492,16 +515,17 @@ } } } - + /** - * Obtain a ReadersAndLiveDocs instance from the - * readerPool. If create is true, you must later call - * {@link #release(ReadersAndLiveDocs)}. + * Obtain a ReadersAndLiveDocs instance from the readerPool. If create is + * true, you must later call {@link #release(ReadersAndLiveDocs)}. */ - public synchronized ReadersAndLiveDocs get(SegmentInfoPerCommit info, boolean create) { - - assert info.info.dir == directory: "info.dir=" + info.info.dir + " vs " + directory; - + public synchronized ReadersAndLiveDocs get(SegmentInfoPerCommit info, + boolean create) { + + assert info.info.dir == directory : "info.dir=" + info.info.dir + " vs " + + directory; + ReadersAndLiveDocs rld = readerMap.get(info); if (rld == null) { if (!create) { @@ -511,34 +535,34 @@ // Steal initial reference: readerMap.put(info, rld); } else { - assert rld.info == info: "rld.info=" + rld.info + " info=" + info + " isLive?=" + infoIsLive(rld.info) + " vs " + infoIsLive(info); + assert rld.info == info : "rld.info=" + rld.info + " info=" + info + + " isLive?=" + infoIsLive(rld.info) + " vs " + infoIsLive(info); } - + if (create) { // Return ref to caller: rld.incRef(); } - + return rld; } } - + /** - * Obtain the number of deleted docs for a pooled reader. - * If the reader isn't being pooled, the segmentInfo's - * delCount is returned. + * Obtain the number of deleted docs for a pooled reader. If the reader isn't + * being pooled, the segmentInfo's delCount is returned. */ public int numDeletedDocs(SegmentInfoPerCommit info) { ensureOpen(false); int delCount = info.getDelCount(); - + final ReadersAndLiveDocs rld = readerPool.get(info, false); if (rld != null) { delCount += rld.getPendingDeleteCount(); } return delCount; } - + /** * Used internally to throw an {@link AlreadyClosedException} if this * IndexWriter has been closed or is in the process of closing. @@ -550,32 +574,34 @@ * @throws AlreadyClosedException * if this IndexWriter is closed or in the process of closing */ - protected final void ensureOpen(boolean failIfClosing) throws AlreadyClosedException { + protected final void ensureOpen(boolean failIfClosing) + throws AlreadyClosedException { if (closed || (failIfClosing && closing)) { throw new AlreadyClosedException("this IndexWriter is closed"); } } - + /** - * Used internally to throw an {@link - * AlreadyClosedException} if this IndexWriter has been - * closed ({@code closed=true}) or is in the process of + * Used internally to throw an {@link AlreadyClosedException} if this + * IndexWriter has been closed ({@code closed=true}) or is in the process of * closing ({@code closing=true}). *

    * Calls {@link #ensureOpen(boolean) ensureOpen(true)}. - * @throws AlreadyClosedException if this IndexWriter is closed + * + * @throws AlreadyClosedException + * if this IndexWriter is closed */ protected final void ensureOpen() throws AlreadyClosedException { ensureOpen(true); } - + final Codec codec; // for writing new segments - + /** * Constructs a new IndexWriter per the settings given in conf. - * Note that the passed in {@link IndexWriterConfig} is - * privately cloned; if you need to make subsequent "live" - * changes to the configuration use {@link #getConfig}. + * Note that the passed in {@link IndexWriterConfig} is privately cloned; if + * you need to make subsequent "live" changes to the configuration use + * {@link #getConfig}. *

    * * @param d @@ -599,15 +625,15 @@ mergePolicy.setIndexWriter(this); mergeScheduler = config.getMergeScheduler(); codec = config.getCodec(); - + bufferedDeletesStream = new BufferedDeletesStream(infoStream); poolReaders = config.getReaderPooling(); - + writeLock = directory.makeLock(WRITE_LOCK_NAME); - + if (!writeLock.obtain(config.getWriteLockTimeout())) // obtain write lock - throw new LockObtainFailedException("Index locked for write: " + writeLock); - + throw new LockObtainFailedException("Index locked for write: " + writeLock); + boolean success = false; try { OpenMode mode = config.getOpenMode(); @@ -620,15 +646,15 @@ // CREATE_OR_APPEND - create only if an index does not exist create = !DirectoryReader.indexExists(directory); } - + // If index is too old, reading the segments will throw // IndexFormatTooOldException. segmentInfos = new SegmentInfos(); - + if (create) { - // Try to read first. This is to allow create + // Try to read first. This is to allow create // against an index that's currently open for - // searching. In this case we write the next + // searching. In this case we write the next // segments_N file with no segments: try { segmentInfos.read(directory); @@ -636,48 +662,49 @@ } catch (IOException e) { // Likely this means it's a fresh directory } - + // Record that we have a change (zero out all // segments) pending: changeCount++; segmentInfos.changed(); } else { segmentInfos.read(directory); - + IndexCommit commit = config.getIndexCommit(); if (commit != null) { // Swap out all segments, but, keep metadata in // SegmentInfos, like version & generation, to - // preserve write-once. This is important if + // preserve write-once. This is important if // readers are open against the future commit // points. - if (commit.getDirectory() != directory) - throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory"); + if (commit.getDirectory() != directory) throw new IllegalArgumentException( + "IndexCommit's directory doesn't match my directory"); SegmentInfos oldInfos = new SegmentInfos(); oldInfos.read(directory, commit.getSegmentsFileName()); segmentInfos.replace(oldInfos); changeCount++; segmentInfos.changed(); if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "init: loaded commit \"" + commit.getSegmentsFileName() + "\""); + infoStream.message("IW", + "init: loaded commit \"" + commit.getSegmentsFileName() + "\""); } } } - + rollbackSegments = segmentInfos.createBackupSegmentInfos(); - + // start with previous field numbers, but new FieldInfos globalFieldNumberMap = getFieldNumberMap(); - docWriter = new DocumentsWriter(codec, config, directory, this, globalFieldNumberMap, bufferedDeletesStream); - + docWriter = new DocumentsWriter(codec, config, directory, this, + globalFieldNumberMap, bufferedDeletesStream); + // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: - synchronized(this) { + synchronized (this) { deleter = new IndexFileDeleter(directory, - config.getIndexDeletionPolicy(), - segmentInfos, infoStream, this); + config.getIndexDeletionPolicy(), segmentInfos, infoStream, this); } - + if (deleter.startingCommitDeleted) { // Deletion policy deleted the "head" commit point. // We have to mark ourself as changed so that if we @@ -686,18 +713,19 @@ changeCount++; segmentInfos.changed(); } - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "init: create=" + create); messageState(); } - + success = true; - + } finally { if (!success) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "init: hit exception on init; releasing write lock"); + infoStream.message("IW", + "init: hit exception on init; releasing write lock"); } try { writeLock.release(); @@ -708,102 +736,102 @@ } } } - + private FieldInfos getFieldInfos(SegmentInfo info) throws IOException { Directory cfsDir = null; try { if (info.getUseCompoundFile()) { cfsDir = new CompoundFileDirectory(info.dir, - IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), - IOContext.READONCE, - false); + IndexFileNames.segmentFileName(info.name, "", + IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, + false); } else { cfsDir = info.dir; } - return info.getCodec().fieldInfosFormat().getFieldInfosReader().read(cfsDir, - info.name, - IOContext.READONCE); + return info.getCodec().fieldInfosFormat().getFieldInfosReader() + .read(cfsDir, info.name, IOContext.READONCE); } finally { if (info.getUseCompoundFile() && cfsDir != null) { cfsDir.close(); } } } - + /** - * Loads or returns the already loaded the global field number map for this {@link SegmentInfos}. - * If this {@link SegmentInfos} has no global field number map the returned instance is empty + * Loads or returns the already loaded the global field number map for this + * {@link SegmentInfos}. If this {@link SegmentInfos} has no global field + * number map the returned instance is empty */ private FieldNumbers getFieldNumberMap() throws IOException { - final FieldNumbers map = new FieldNumbers(); - + final FieldNumbers map = new FieldNumbers(); + SegmentInfoPerCommit biggest = null; - for(SegmentInfoPerCommit info : segmentInfos) { - if (biggest == null || (info.info.getDocCount()-info.getDelCount()) > (biggest.info.getDocCount()-biggest.getDelCount())) { + for (SegmentInfoPerCommit info : segmentInfos) { + if (biggest == null + || (info.info.getDocCount() - info.getDelCount()) > (biggest.info + .getDocCount() - biggest.getDelCount())) { biggest = info; } } - + if (biggest != null) { - for(FieldInfo fi : getFieldInfos(biggest.info)) { + for (FieldInfo fi : getFieldInfos(biggest.info)) { map.addOrGet(fi.name, fi.number); } } - + // TODO: we could also pull DV type of each field here, // and use that to make sure new segment(s) don't change // the type... - + return map; } /** - * Returns a {@link LiveIndexWriterConfig}, which can be used to query the IndexWriter - * current settings, as well as modify "live" ones. + * Returns a {@link LiveIndexWriterConfig}, which can be used to query the + * IndexWriter current settings, as well as modify "live" ones. */ public LiveIndexWriterConfig getConfig() { ensureOpen(false); return config; } - + private void messageState() { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "\ndir=" + directory + "\n" + - "index=" + segString() + "\n" + - "version=" + Constants.LUCENE_VERSION + "\n" + - config.toString()); + infoStream.message("IW", "\ndir=" + directory + "\n" + "index=" + + segString() + "\n" + "version=" + Constants.LUCENE_VERSION + "\n" + + config.toString()); } } - + /** - * Commits all changes to an index, waits for pending merges - * to complete, and closes all associated files. + * Commits all changes to an index, waits for pending merges to complete, and + * closes all associated files. *

    - * This is a "slow graceful shutdown" which may take a long time - * especially if a big merge is pending: If you only want to close - * resources use {@link #rollback()}. If you only want to commit - * pending changes and close resources see {@link #close(boolean)}. + * This is a "slow graceful shutdown" which may take a long time especially if + * a big merge is pending: If you only want to close resources use + * {@link #rollback()}. If you only want to commit pending changes and close + * resources see {@link #close(boolean)}. *

    - * Note that this may be a costly - * operation, so, try to re-use a single writer instead of - * closing and opening a new one. See {@link #commit()} for - * caveats about write caching done by some IO devices. - * - *

    If an Exception is hit during close, eg due to disk - * full or some other reason, then both the on-disk index - * and the internal state of the IndexWriter instance will - * be consistent. However, the close will not be complete - * even though part of it (flushing buffered documents) - * may have succeeded, so the write lock will still be - * held.

    - * - *

    If you can correct the underlying cause (eg free up - * some disk space) then you can call close() again. - * Failing that, if you want to force the write lock to be - * released (dangerous, because you may then lose buffered - * docs in the IndexWriter instance) then you can do - * something like this:

    - * + * Note that this may be a costly operation, so, try to re-use a single writer + * instead of closing and opening a new one. See {@link #commit()} for caveats + * about write caching done by some IO devices. + * + *

    + * If an Exception is hit during close, eg due to disk full or some other + * reason, then both the on-disk index and the internal state of the + * IndexWriter instance will be consistent. However, the close will not be + * complete even though part of it (flushing buffered documents) may have + * succeeded, so the write lock will still be held. + *

    + * + *

    + * If you can correct the underlying cause (eg free up some disk space) then + * you can call close() again. Failing that, if you want to force the write + * lock to be released (dangerous, because you may then lose buffered docs in + * the IndexWriter instance) then you can do something like this: + *

    + * *
        * try {
        *   writer.close();
    @@ -813,49 +841,55 @@
        *   }
        * }
        * 
    - * - * after which, you must be certain not to use the writer - * instance anymore.

    - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer, again. See above for details.

    - * - * @throws IOException if there is a low-level IO error + * + * after which, you must be certain not to use the writer instance + * anymore.

    + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer, again. See above for details. + *

    + * + * @throws IOException + * if there is a low-level IO error */ @Override public void close() throws IOException { close(true); } - + /** - * Closes the index with or without waiting for currently - * running merges to finish. This is only meaningful when - * using a MergeScheduler that runs merges in background - * threads. - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer, again. See above for details.

    - * - *

    NOTE: it is dangerous to always call - * close(false), especially when IndexWriter is not open - * for very long, because this can result in "merge - * starvation" whereby long merges will never have a - * chance to finish. This will cause too many segments in - * your index over time.

    - * - * @param waitForMerges if true, this call will block - * until all merges complete; else, it will ask all - * running merges to abort, wait until those merges have - * finished (which should be at most a few seconds), and - * then return. + * Closes the index with or without waiting for currently running merges to + * finish. This is only meaningful when using a MergeScheduler that runs + * merges in background threads. + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer, again. See above for details. + *

    + * + *

    + * NOTE: it is dangerous to always call close(false), especially when + * IndexWriter is not open for very long, because this can result in "merge + * starvation" whereby long merges will never have a chance to finish. This + * will cause too many segments in your index over time. + *

    + * + * @param waitForMerges + * if true, this call will block until all merges complete; else, it + * will ask all running merges to abort, wait until those merges have + * finished (which should be at most a few seconds), and then return. */ public void close(boolean waitForMerges) throws IOException { - + + // commit pending updates + if (updatesPending) { + commitInternal(); + } + // Ensure that only one thread actually gets to do the // closing, and make sure no commit is also in progress: - synchronized(commitLock) { + synchronized (commitLock) { if (shouldClose()) { // If any methods have hit OutOfMemoryError, then abort // on close, in case the internal state of IndexWriter @@ -868,12 +902,12 @@ } } } - + // Returns true if this thread should attempt to close, or // false if IndexWriter is now closed; else, waits until // another thread finishes closing synchronized private boolean shouldClose() { - while(true) { + while (true) { if (!closed) { if (!closing) { closing = true; @@ -889,21 +923,24 @@ } } } - - private void closeInternal(boolean waitForMerges, boolean doFlush) throws IOException { + + private void closeInternal(boolean waitForMerges, boolean doFlush) + throws IOException { boolean interrupted = false; try { - + if (pendingCommit != null) { - throw new IllegalStateException("cannot close: prepareCommit was already called with no corresponding call to commit"); + throw new IllegalStateException( + "cannot close: prepareCommit was already called with no corresponding call to commit"); } - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "now flush at close waitForMerges=" + waitForMerges); + infoStream.message("IW", "now flush at close waitForMerges=" + + waitForMerges); } - + docWriter.close(); - + try { // Only allow a new merge to be triggered if we are // going to wait for merges: @@ -915,9 +952,10 @@ } finally { try { - // clean up merge scheduler in all cases, although flushing may have failed: + // clean up merge scheduler in all cases, although flushing may have + // failed: interrupted = Thread.interrupted(); - + if (waitForMerges) { try { // Give merge scheduler last chance to run, in case @@ -927,12 +965,13 @@ // ignore any interruption, does not matter interrupted = true; if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "interrupted while waiting for final merges"); + infoStream.message("IW", + "interrupted while waiting for final merges"); } } } - synchronized(this) { + synchronized (this) { for (;;) { try { finishMerges(waitForMerges && !interrupted); @@ -943,7 +982,8 @@ // so it will not wait interrupted = true; if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "interrupted while waiting for merges to finish"); + infoStream.message("IW", + "interrupted while waiting for merges to finish"); } } } @@ -951,42 +991,44 @@ } } finally { - // shutdown policy, scheduler and all threads (this call is not interruptible): + // shutdown policy, scheduler and all threads (this call is not + // interruptible): IOUtils.closeWhileHandlingException(mergePolicy, mergeScheduler); } } - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "now call final commit()"); } - + if (doFlush) { commitInternal(); } - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "at close: " + segString()); } // used by assert below final DocumentsWriter oldWriter = docWriter; - synchronized(this) { + synchronized (this) { readerPool.dropAll(true); docWriter = null; deleter.close(); } - + if (writeLock != null) { - writeLock.release(); // release write lock + writeLock.release(); // release write lock writeLock = null; } - synchronized(this) { + synchronized (this) { closed = true; } - assert oldWriter.perThreadPool.numDeactivatedThreadStates() == oldWriter.perThreadPool.getMaxThreadStates(); + assert oldWriter.perThreadPool.numDeactivatedThreadStates() == oldWriter.perThreadPool + .getMaxThreadStates(); } catch (OutOfMemoryError oom) { handleOOM(oom, "closeInternal"); } finally { - synchronized(this) { + synchronized (this) { closing = false; notifyAll(); if (!closed) { @@ -999,54 +1041,54 @@ if (interrupted) Thread.currentThread().interrupt(); } } - + /** Returns the Directory used by this index. */ public Directory getDirectory() { return directory; } - + /** Returns the analyzer used by this index. */ public Analyzer getAnalyzer() { ensureOpen(); return analyzer; } - - /** Returns total number of docs in this index, including - * docs not yet flushed (still in the RAM buffer), - * not counting deletions. - * @see #numDocs */ + + /** + * Returns total number of docs in this index, including docs not yet flushed + * (still in the RAM buffer), not counting deletions. + * + * @see #numDocs + */ public synchronized int maxDoc() { ensureOpen(); int count; - if (docWriter != null) - count = docWriter.getNumDocs(); - else - count = 0; - + if (docWriter != null) count = docWriter.getNumDocs(); + else count = 0; + count += segmentInfos.totalDocCount(); return count; } - - /** Returns total number of docs in this index, including - * docs not yet flushed (still in the RAM buffer), and - * including deletions. NOTE: buffered deletions - * are not counted. If you really need these to be - * counted you should call {@link #commit()} first. - * @see #numDocs */ + + /** + * Returns total number of docs in this index, including docs not yet flushed + * (still in the RAM buffer), and including deletions. NOTE: buffered + * deletions are not counted. If you really need these to be counted you + * should call {@link #commit()} first. + * + * @see #numDocs + */ public synchronized int numDocs() { ensureOpen(); int count; - if (docWriter != null) - count = docWriter.getNumDocs(); - else - count = 0; - + if (docWriter != null) count = docWriter.getNumDocs(); + else count = 0; + for (final SegmentInfoPerCommit info : segmentInfos) { count += info.info.getDocCount() - numDeletedDocs(info); } return count; } - + /** * Returns true if this index has deletions (including buffered deletions). */ @@ -1065,164 +1107,195 @@ } return false; } - + /** * Adds a document to this index. - * - *

    Note that if an Exception is hit (for example disk full) - * then the index will be consistent, but this document - * may not have been added. Furthermore, it's possible - * the index will have one segment in non-compound format - * even when using compound files (when a merge has - * partially succeeded).

    - * - *

    This method periodically flushes pending documents - * to the Directory (see above), and - * also periodically triggers segment merges in the index - * according to the {@link MergePolicy} in use.

    - * - *

    Merges temporarily consume space in the - * directory. The amount of space required is up to 1X the - * size of all segments being merged, when no - * readers/searchers are open against the index, and up to - * 2X the size of all segments being merged when - * readers/searchers are open against the index (see - * {@link #forceMerge(int)} for details). The sequence of - * primitive merge operations performed is governed by the - * merge policy. - * - *

    Note that each term in the document can be no longer - * than 16383 characters, otherwise an - * IllegalArgumentException will be thrown.

    - * - *

    Note that it's possible to create an invalid Unicode - * string in java if a UTF16 surrogate pair is malformed. - * In this case, the invalid characters are silently - * replaced with the Unicode replacement character - * U+FFFD.

    - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * + *

    + * Note that if an Exception is hit (for example disk full) then the index + * will be consistent, but this document may not have been added. Furthermore, + * it's possible the index will have one segment in non-compound format even + * when using compound files (when a merge has partially succeeded). + *

    + * + *

    + * This method periodically flushes pending documents to the Directory (see above), and also periodically triggers segment merges in + * the index according to the {@link MergePolicy} in use. + *

    + * + *

    + * Merges temporarily consume space in the directory. The amount of space + * required is up to 1X the size of all segments being merged, when no + * readers/searchers are open against the index, and up to 2X the size of all + * segments being merged when readers/searchers are open against the index + * (see {@link #forceMerge(int)} for details). The sequence of primitive merge + * operations performed is governed by the merge policy. + * + *

    + * Note that each term in the document can be no longer than 16383 characters, + * otherwise an IllegalArgumentException will be thrown. + *

    + * + *

    + * Note that it's possible to create an invalid Unicode string in java if a + * UTF16 surrogate pair is malformed. In this case, the invalid characters are + * silently replaced with the Unicode replacement character U+FFFD. + *

    + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void addDocument(IndexDocument doc) throws IOException { addDocument(doc, analyzer); } - + /** * Adds a document to this index, using the provided analyzer instead of the * value of {@link #getAnalyzer()}. - * - *

    See {@link #addDocument(IndexDocument)} for details on - * index and IndexWriter state after an Exception, and - * flushing/merging temporary free space requirements.

    - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * + *

    + * See {@link #addDocument(IndexDocument)} for details on index and + * IndexWriter state after an Exception, and flushing/merging temporary free + * space requirements. + *

    + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ - public void addDocument(IndexDocument doc, Analyzer analyzer) throws IOException { + public void addDocument(IndexDocument doc, Analyzer analyzer) + throws IOException { updateDocument(null, doc, analyzer); } - + /** - * Atomically adds a block of documents with sequentially - * assigned document IDs, such that an external reader - * will see all or none of the documents. - * - *

    WARNING: the index does not currently record - * which documents were added as a block. Today this is - * fine, because merging will preserve a block. The order of - * documents within a segment will be preserved, even when child - * documents within a block are deleted. Most search features - * (like result grouping and block joining) require you to - * mark documents; when these documents are deleted these - * search features will not work as expected. Obviously adding - * documents to an existing block will require you the reindex - * the entire block. - * - *

    However it's possible that in the future Lucene may - * merge more aggressively re-order documents (for example, - * perhaps to obtain better index compression), in which case - * you may need to fully re-index your documents at that time. - * - *

    See {@link #addDocument(IndexDocument)} for details on - * index and IndexWriter state after an Exception, and - * flushing/merging temporary free space requirements.

    - * - *

    NOTE: tools that do offline splitting of an index - * (for example, IndexSplitter in contrib) or - * re-sorting of documents (for example, IndexSorter in - * contrib) are not aware of these atomically added documents - * and will likely break them up. Use such tools at your - * own risk! - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * + * Atomically adds a block of documents with sequentially assigned document + * IDs, such that an external reader will see all or none of the documents. + * + *

    + * WARNING: the index does not currently record which documents were + * added as a block. Today this is fine, because merging will preserve a + * block. The order of documents within a segment will be preserved, even when + * child documents within a block are deleted. Most search features (like + * result grouping and block joining) require you to mark documents; when + * these documents are deleted these search features will not work as + * expected. Obviously adding documents to an existing block will require you + * the reindex the entire block. + * + *

    + * However it's possible that in the future Lucene may merge more aggressively + * re-order documents (for example, perhaps to obtain better index + * compression), in which case you may need to fully re-index your documents + * at that time. + * + *

    + * See {@link #addDocument(IndexDocument)} for details on index and + * IndexWriter state after an Exception, and flushing/merging temporary free + * space requirements. + *

    + * + *

    + * NOTE: tools that do offline splitting of an index (for example, + * IndexSplitter in contrib) or re-sorting of documents (for example, + * IndexSorter in contrib) are not aware of these atomically added documents + * and will likely break them up. Use such tools at your own risk! + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * * @lucene.experimental */ - public void addDocuments(Iterable docs) throws IOException { + public void addDocuments(Iterable docs) + throws IOException { addDocuments(docs, analyzer); } - + /** - * Atomically adds a block of documents, analyzed using the - * provided analyzer, with sequentially assigned document - * IDs, such that an external reader will see all or none - * of the documents. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * + * Atomically adds a block of documents, analyzed using the provided analyzer, + * with sequentially assigned document IDs, such that an external reader will + * see all or none of the documents. + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * * @lucene.experimental */ - public void addDocuments(Iterable docs, Analyzer analyzer) throws IOException { + public void addDocuments(Iterable docs, + Analyzer analyzer) throws IOException { updateDocuments(null, docs, analyzer); } - + /** - * Atomically deletes documents matching the provided - * delTerm and adds a block of documents with sequentially - * assigned document IDs, such that an external reader - * will see all or none of the documents. - * + * Atomically deletes documents matching the provided delTerm and adds a block + * of documents with sequentially assigned document IDs, such that an external + * reader will see all or none of the documents. + * * See {@link #addDocuments(Iterable)}. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * * @lucene.experimental */ - public void updateDocuments(Term delTerm, Iterable docs) throws IOException { - updateDocuments(delTerm, docs, analyzer); + public void replaceDocuments(Term delTerm, + Iterable docs) throws IOException { + replaceDocuments(delTerm, docs, analyzer); } - + /** - * Atomically deletes documents matching the provided - * delTerm and adds a block of documents, analyzed using - * the provided analyzer, with sequentially - * assigned document IDs, such that an external reader - * will see all or none of the documents. - * + * Replaced by {@link #replaceDocuments(Term, Iterable)}. + * + * @deprecated use {@link #replaceDocuments(Term, Iterable)}. + */ + @Deprecated + public void updateDocuments(Term delTerm, + Iterable docs) throws IOException { + replaceDocuments(delTerm, docs, analyzer); + } + + /** + * Atomically deletes documents matching the provided delTerm and adds a block + * of documents, analyzed using the provided analyzer, with sequentially + * assigned document IDs, such that an external reader will see all or none of + * the documents. + * * See {@link #addDocuments(Iterable)}. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * * @lucene.experimental */ - public void updateDocuments(Term delTerm, Iterable docs, Analyzer analyzer) throws IOException { + public void replaceDocuments(Term delTerm, + Iterable docs, Analyzer analyzer) + throws IOException { ensureOpen(); try { boolean success = false; @@ -1244,17 +1317,121 @@ handleOOM(oom, "updateDocuments"); } } - + /** + * Replaced by {@link #replaceDocuments(Term, Iterable, Analyzer)}. + * + * @deprecated use {@link #replaceDocuments(Term, Iterable, Analyzer)}. + */ + @Deprecated + public void updateDocuments(Term delTerm, + Iterable docs, Analyzer analyzer) + throws IOException { + replaceDocuments(delTerm, docs, analyzer); + } + + /** + * Update fields of documents matching the given term. + * + * @param term + * The term to match. + * @param operation + * defines whether the new fields are either: + *
      + *
    • Added to existing matching documents without affecting + * existing fields (using {@link Operation#ADD_FIELDS}), or + *
    • + * Added to existing matching documents and replacing existing fields + * with the same name (using {@link Operation#REPLACE_FIELDS} ) . + *
    + * @param fields + * An {@link IndexDocument} containing the fields to use. Note: + * Fields could be added to more than one document, and + * therefore analyzed more than once. This may result in an + * exception, for instance in the case of a field based on a reader. + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * + * @lucene.experimental + */ + public void updateFields(FieldsUpdate.Operation operation, Term term, + IndexDocument fields) throws IOException { + updateFields(operation, term, fields, analyzer); + } + + /** + * Update fields of documents matching the given term, using the provided + * analyzer instead of the value of {@link #getAnalyzer()}. + * + * @param term + * The term to match. + * @param operation + * defines whether the new fields are either: + *
      + *
    • Added to existing matching documents without affecting + * existing fields (using {@link Operation#ADD_FIELDS}), or
    • + * Added to existing matching documents and replacing existing fields + * with the same name (using {@link Operation#REPLACE_FIELDS} ) . + *
    + * @param fields + * An {@link IndexDocument} containing the fields to use. Note: + * Fields could be added to more than one document, and + * therefore analyzed more than once. This may result in an + * exception, for instance in the case of a field based on a reader. + * @param analyzer + * The analyzer to use. + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + * + * @lucene.experimental + */ + public void updateFields(FieldsUpdate.Operation operation, Term term, + IndexDocument fields, Analyzer analyzer) throws IOException { + ensureOpen(); + try { + boolean success = false; + boolean anySegmentFlushed = false; + try { + anySegmentFlushed = docWriter.updateFields(term, new FieldsUpdate( + operation, fields, analyzer)); + success = true; + updatesPending = true; + } finally { + if (!success) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception updating document"); + } + } + } + + if (anySegmentFlushed) { + maybeMerge(); + } + } catch (OutOfMemoryError oom) { + handleOOM(oom, "updateDocument"); + } + } + + /** * Deletes the document(s) containing term. - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - * @param term the term to identify the documents to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + * @param term + * the term to identify the documents to be deleted + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void deleteDocuments(Term term) throws IOException { ensureOpen(); @@ -1264,23 +1441,23 @@ handleOOM(oom, "deleteDocuments(Term)"); } } - - /** Expert: attempts to delete by document ID, as long as - * the provided reader is a near-real-time reader (from {@link - * DirectoryReader#open(IndexWriter,boolean)}). If the - * provided reader is an NRT reader obtained from this - * writer, and its segment has not been merged away, then - * the delete succeeds and this method returns true; else, it - * returns false the caller must then separately delete by - * Term or Query. - * - * NOTE: this method can only delete documents - * visible to the currently open NRT reader. If you need - * to delete documents indexed after opening the NRT - * reader you must use the other deleteDocument methods - * (e.g., {@link #deleteDocuments(Term)}). */ - public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException { - + + /** + * Expert: attempts to delete by document ID, as long as the provided reader + * is a near-real-time reader (from + * {@link DirectoryReader#open(IndexWriter,boolean)}). If the provided reader + * is an NRT reader obtained from this writer, and its segment has not been + * merged away, then the delete succeeds and this method returns true; else, + * it returns false the caller must then separately delete by Term or Query. + * + * NOTE: this method can only delete documents visible to the currently + * open NRT reader. If you need to delete documents indexed after opening the + * NRT reader you must use the other deleteDocument methods (e.g., + * {@link #deleteDocuments(Term)}). + */ + public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) + throws IOException { + final AtomicReader reader; if (readerIn instanceof AtomicReader) { // Reader is already atomic: use the incoming docID: @@ -1294,25 +1471,27 @@ assert docID >= 0; assert docID < reader.maxDoc(); } - + if (!(reader instanceof SegmentReader)) { - throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders"); + throw new IllegalArgumentException( + "the reader must be a SegmentReader or composite reader containing only SegmentReaders"); } - + final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo(); - + // TODO: this is a slow linear search, but, number of // segments should be contained unless something is // seriously wrong w/ the index, so it should be a minor // cost: - + if (segmentInfos.indexOf(info) != -1) { ReadersAndLiveDocs rld = readerPool.get(info, false); if (rld != null) { - synchronized(bufferedDeletesStream) { + synchronized (bufferedDeletesStream) { rld.initWritableLiveDocs(); if (rld.delete(docID)) { - final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount(); + final int fullDelCount = rld.info.getDelCount() + + rld.getPendingDeleteCount(); if (fullDelCount == rld.info.info.getDocCount()) { // If a merge has already registered for this // segment, we leave it in the readerPool; the @@ -1324,36 +1503,38 @@ checkpoint(); } } - + // Must bump changeCount so if no other changes // happened, we still commit this change: changeCount++; } - //System.out.println(" yes " + info.info.name + " " + docID); + // System.out.println(" yes " + info.info.name + " " + docID); return true; } } else { - //System.out.println(" no rld " + info.info.name + " " + docID); + // System.out.println(" no rld " + info.info.name + " " + docID); } } else { - //System.out.println(" no seg " + info.info.name + " " + docID); + // System.out.println(" no seg " + info.info.name + " " + docID); } return false; } - + /** - * Deletes the document(s) containing any of the - * terms. All given deletes are applied and flushed atomically - * at the same time. - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - * @param terms array of terms to identify the documents - * to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * Deletes the document(s) containing any of the terms. All given deletes are + * applied and flushed atomically at the same time. + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + * @param terms + * array of terms to identify the documents to be deleted + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void deleteDocuments(Term... terms) throws IOException { ensureOpen(); @@ -1363,17 +1544,21 @@ handleOOM(oom, "deleteDocuments(Term..)"); } } - + /** * Deletes the document(s) matching the provided query. - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - * @param query the query to identify the documents to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + * @param query + * the query to identify the documents to be deleted + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void deleteDocuments(Query query) throws IOException { ensureOpen(); @@ -1383,19 +1568,22 @@ handleOOM(oom, "deleteDocuments(Query)"); } } - + /** - * Deletes the document(s) matching any of the provided queries. - * All given deletes are applied and flushed atomically at the same time. - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - * @param queries array of queries to identify the documents - * to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * Deletes the document(s) matching any of the provided queries. All given + * deletes are applied and flushed atomically at the same time. + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + * @param queries + * array of queries to identify the documents to be deleted + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void deleteDocuments(Query... queries) throws IOException { ensureOpen(); @@ -1405,48 +1593,69 @@ handleOOM(oom, "deleteDocuments(Query..)"); } } - + /** - * Updates a document by first deleting the document(s) - * containing term and then adding the new - * document. The delete and then add are atomic as seen - * by a reader on the same index (flush may happen only after - * the add). - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - * @param term the term to identify the document(s) to be - * deleted - * @param doc the document to be added - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * Updates a document by first deleting the document(s) containing + * term and then adding the new document. The delete and then add + * are atomic as seen by a reader on the same index (flush may happen only + * after the add). + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + * @param term + * the term to identify the document(s) to be deleted + * @param doc + * the document to be added + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ + public void replaceDocument(Term term, IndexDocument doc) throws IOException { + ensureOpen(); + replaceDocument(term, doc, getAnalyzer()); + } + + /** + * Replaced by {@link #replaceDocument(Term, IndexDocument)}. If you wish to + * update fields of existing documents use + * {@link #updateFields(Operation, Term, IndexDocument)}. + * + * @deprecated use {@link #replaceDocument(Term, IndexDocument)} or + * {@link #updateFields(Operation, Term, IndexDocument)}. + */ + @Deprecated public void updateDocument(Term term, IndexDocument doc) throws IOException { ensureOpen(); - updateDocument(term, doc, analyzer); + replaceDocument(term, doc, analyzer); } - + /** - * Updates a document by first deleting the document(s) - * containing term and then adding the new - * document. The delete and then add are atomic as seen - * by a reader on the same index (flush may happen only after - * the add). - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - * @param term the term to identify the document(s) to be - * deleted - * @param doc the document to be added - * @param analyzer the analyzer to use when analyzing the document - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * Updates a document by first deleting the document(s) containing + * term and then adding the new document. The delete and then add + * are atomic as seen by a reader on the same index (flush may happen only + * after the add). + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + * @param term + * the term to identify the document(s) to be deleted + * @param doc + * the document to be added + * @param analyzer + * the analyzer to use when analyzing the document + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ - public void updateDocument(Term term, IndexDocument doc, Analyzer analyzer) + public void replaceDocument(Term term, IndexDocument doc, Analyzer analyzer) throws IOException { ensureOpen(); try { @@ -1462,7 +1671,7 @@ } } } - + if (anySegmentFlushed) { maybeMerge(MergeTrigger.SEGMENT_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS); } @@ -1470,22 +1679,37 @@ handleOOM(oom, "updateDocument"); } } - + + /** + * Replaced by {@link #replaceDocument(Term, IndexDocument, Analyzer)}. If you + * wish to update fields of existing documents use + * {@link #updateFields(Operation, Term, IndexDocument, Analyzer)}. + * + * @deprecated use {@link #replaceDocument(Term, IndexDocument, Analyzer)} or + * {@link #updateFields(Operation, Term, IndexDocument, Analyzer)} + * . + */ + @Deprecated + public void updateDocument(Term term, IndexDocument doc, Analyzer analyzer) + throws IOException { + replaceDocument(term, doc, analyzer); + } + // for test purpose - final synchronized int getSegmentCount(){ + final synchronized int getSegmentCount() { return segmentInfos.size(); } - + // for test purpose - final synchronized int getNumBufferedDocuments(){ + final synchronized int getNumBufferedDocuments() { return docWriter.getNumDocs(); } - + // for test purpose final synchronized Collection getIndexFileNames() throws IOException { return segmentInfos.files(directory, true); } - + // for test purpose final synchronized int getDocCount(int i) { if (i >= 0 && i < segmentInfos.size()) { @@ -1494,392 +1718,407 @@ return -1; } } - + // for test purpose final int getFlushCount() { return flushCount.get(); } - + // for test purpose final int getFlushDeletesCount() { return flushDeletesCount.get(); } - + final String newSegmentName() { // Cannot synchronize on IndexWriter because that causes // deadlock - synchronized(segmentInfos) { + synchronized (segmentInfos) { // Important to increment changeCount so that the - // segmentInfos is written on close. Otherwise we + // segmentInfos is written on close. Otherwise we // could close, re-open and re-return the same segment // name that was previously returned which can cause // problems at least with ConcurrentMergeScheduler. changeCount++; segmentInfos.changed(); - return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); + return "_" + + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } } - - /** If non-null, information about merges will be printed to this. + + /** + * If non-null, information about merges will be printed to this. */ final InfoStream infoStream; - + /** - * Forces merge policy to merge segments until there are <= - * maxNumSegments. The actual merges to be - * executed are determined by the {@link MergePolicy}. - * - *

    This is a horribly costly operation, especially when - * you pass a small {@code maxNumSegments}; usually you - * should only call this if the index is static (will no - * longer be changed).

    - * - *

    Note that this requires up to 2X the index size free - * space in your Directory (3X if you're using compound - * file format). For example, if your index size is 10 MB - * then you need up to 20 MB free for this to complete (30 - * MB if you're using compound file format). Also, - * it's best to call {@link #commit()} afterwards, - * to allow IndexWriter to free up disk space.

    - * - *

    If some but not all readers re-open while merging - * is underway, this will cause > 2X temporary - * space to be consumed as those new readers will then - * hold open the temporary segments at that time. It is - * best not to re-open readers while merging is running.

    - * - *

    The actual temporary usage could be much less than - * these figures (it depends on many factors).

    - * - *

    In general, once this completes, the total size of the - * index will be less than the size of the starting index. - * It could be quite a bit smaller (if there were many - * pending deletes) or just slightly smaller.

    - * - *

    If an Exception is hit, for example - * due to disk full, the index will not be corrupted and no - * documents will be lost. However, it may have - * been partially merged (some segments were merged but - * not all), and it's possible that one of the segments in - * the index will be in non-compound format even when - * using compound file format. This will occur when the - * Exception is hit during conversion of the segment into - * compound format.

    - * - *

    This call will merge those segments present in - * the index when the call started. If other threads are - * still adding documents and flushing segments, those - * newly created segments will not be merged unless you - * call forceMerge again.

    - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - *

    NOTE: if you call {@link #close(boolean)} - * with false, which aborts all running merges, - * then any thread still running this method might hit a - * {@link MergePolicy.MergeAbortedException}. - * - * @param maxNumSegments maximum number of segments left - * in the index after merging finishes + * Forces merge policy to merge segments until there are <= maxNumSegments. + * The actual merges to be executed are determined by the {@link MergePolicy}. * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + *

    + * This is a horribly costly operation, especially when you pass a small + * {@code maxNumSegments}; usually you should only call this if the index is + * static (will no longer be changed). + *

    + * + *

    + * Note that this requires up to 2X the index size free space in your + * Directory (3X if you're using compound file format). For example, if your + * index size is 10 MB then you need up to 20 MB free for this to complete (30 + * MB if you're using compound file format). Also, it's best to call + * {@link #commit()} afterwards, to allow IndexWriter to free up disk space. + *

    + * + *

    + * If some but not all readers re-open while merging is underway, this will + * cause > 2X temporary space to be consumed as those new readers will then + * hold open the temporary segments at that time. It is best not to re-open + * readers while merging is running. + *

    + * + *

    + * The actual temporary usage could be much less than these figures (it + * depends on many factors). + *

    + * + *

    + * In general, once this completes, the total size of the index will be less + * than the size of the starting index. It could be quite a bit smaller (if + * there were many pending deletes) or just slightly smaller. + *

    + * + *

    + * If an Exception is hit, for example due to disk full, the index will not be + * corrupted and no documents will be lost. However, it may have been + * partially merged (some segments were merged but not all), and it's possible + * that one of the segments in the index will be in non-compound format even + * when using compound file format. This will occur when the Exception is hit + * during conversion of the segment into compound format. + *

    + * + *

    + * This call will merge those segments present in the index when the call + * started. If other threads are still adding documents and flushing segments, + * those newly created segments will not be merged unless you call forceMerge + * again. + *

    + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + *

    + * NOTE: if you call {@link #close(boolean)} with false, which + * aborts all running merges, then any thread still running this method might + * hit a {@link MergePolicy.MergeAbortedException}. + * + * @param maxNumSegments + * maximum number of segments left in the index after merging + * finishes + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error * @see MergePolicy#findMerges - * - */ + * + */ public void forceMerge(int maxNumSegments) throws IOException { forceMerge(maxNumSegments, true); } - - /** Just like {@link #forceMerge(int)}, except you can - * specify whether the call should block until - * all merging completes. This is only meaningful with a - * {@link MergeScheduler} that is able to run merges in - * background threads. - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    + + /** + * Just like {@link #forceMerge(int)}, except you can specify whether the call + * should block until all merging completes. This is only meaningful with a + * {@link MergeScheduler} that is able to run merges in background threads. + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    */ public void forceMerge(int maxNumSegments, boolean doWait) throws IOException { ensureOpen(); - - if (maxNumSegments < 1) - throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments); - + + if (maxNumSegments < 1) throw new IllegalArgumentException( + "maxNumSegments must be >= 1; got " + maxNumSegments); + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "forceMerge: index now " + segString()); infoStream.message("IW", "now flush at forceMerge"); } - + flush(true, true); - - synchronized(this) { + + synchronized (this) { resetMergeExceptions(); segmentsToMerge.clear(); - for(SegmentInfoPerCommit info : segmentInfos) { + for (SegmentInfoPerCommit info : segmentInfos) { segmentsToMerge.put(info, Boolean.TRUE); } mergeMaxNumSegments = maxNumSegments; - + // Now mark all pending & running merges for forced // merge: - for(final MergePolicy.OneMerge merge : pendingMerges) { + for (final MergePolicy.OneMerge merge : pendingMerges) { merge.maxNumSegments = maxNumSegments; segmentsToMerge.put(merge.info, Boolean.TRUE); } - - for (final MergePolicy.OneMerge merge: runningMerges) { + + for (final MergePolicy.OneMerge merge : runningMerges) { merge.maxNumSegments = maxNumSegments; segmentsToMerge.put(merge.info, Boolean.TRUE); } } - + maybeMerge(MergeTrigger.EXPLICIT, maxNumSegments); - + if (doWait) { - synchronized(this) { - while(true) { - + synchronized (this) { + while (true) { + if (hitOOM) { - throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge"); + throw new IllegalStateException( + "this writer hit an OutOfMemoryError; cannot complete forceMerge"); } - + if (mergeExceptions.size() > 0) { // Forward any exceptions in background merge // threads to the current thread: final int size = mergeExceptions.size(); - for(int i=0;iNOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * - *

    NOTE: if you call {@link #close(boolean)} - * with false, which aborts all running merges, - * then any thread still running this method might hit a - * {@link MergePolicy.MergeAbortedException}. + + /** + * Just like {@link #forceMergeDeletes()}, except you can specify whether the + * call should block until the operation completes. This is only meaningful + * with a {@link MergeScheduler} that is able to run merges in background + * threads. + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * + *

    + * NOTE: if you call {@link #close(boolean)} with false, which + * aborts all running merges, then any thread still running this method might + * hit a {@link MergePolicy.MergeAbortedException}. */ - public void forceMergeDeletes(boolean doWait) - throws IOException { + public void forceMergeDeletes(boolean doWait) throws IOException { ensureOpen(); - + flush(true, true); - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "forceMergeDeletes: index now " + segString()); } - + MergePolicy.MergeSpecification spec; - - synchronized(this) { + + synchronized (this) { spec = mergePolicy.findForcedDeletesMerges(segmentInfos); if (spec != null) { final int numMerges = spec.merges.size(); - for(int i=0;iThis is often a horribly costly operation; rarely - * is it warranted.

    - * - *

    To see how - * many deletions you have pending in your index, call - * {@link IndexReader#numDeletedDocs}.

    - * - *

    NOTE: this method first flushes a new - * segment (if there are indexed documents), and applies - * all buffered deletes. - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    + * Forces merging of all segments that have deleted documents. The actual + * merges to be executed are determined by the {@link MergePolicy}. For + * example, the default {@link TieredMergePolicy} will only pick a segment if + * the percentage of deleted docs is over 10%. + * + *

    + * This is often a horribly costly operation; rarely is it warranted. + *

    + * + *

    + * To see how many deletions you have pending in your index, call + * {@link IndexReader#numDeletedDocs}. + *

    + * + *

    + * NOTE: this method first flushes a new segment (if there are indexed + * documents), and applies all buffered deletes. + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    */ public void forceMergeDeletes() throws IOException { forceMergeDeletes(true); } - + /** - * Expert: asks the mergePolicy whether any merges are - * necessary now and if so, runs the requested merges and - * then iterate (test again if merges are needed) until no - * more merges are returned by the mergePolicy. - * - * Explicit calls to maybeMerge() are usually not - * necessary. The most common case is when merge policy - * parameters have changed. + * Expert: asks the mergePolicy whether any merges are necessary now and if + * so, runs the requested merges and then iterate (test again if merges are + * needed) until no more merges are returned by the mergePolicy. * + * Explicit calls to maybeMerge() are usually not necessary. The most common + * case is when merge policy parameters have changed. + * * This method will call the {@link MergePolicy} with * {@link MergeTrigger#EXPLICIT}. - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    */ public final void maybeMerge() throws IOException { maybeMerge(MergeTrigger.EXPLICIT, UNBOUNDED_MAX_MERGE_SEGMENTS); } - - private final void maybeMerge(MergeTrigger trigger, int maxNumSegments) throws IOException { + + private final void maybeMerge(MergeTrigger trigger, int maxNumSegments) + throws IOException { ensureOpen(false); updatePendingMerges(trigger, maxNumSegments); mergeScheduler.merge(this); } - - private synchronized void updatePendingMerges(MergeTrigger trigger, int maxNumSegments) - throws IOException { + + private synchronized void updatePendingMerges(MergeTrigger trigger, + int maxNumSegments) throws IOException { assert maxNumSegments == -1 || maxNumSegments > 0; assert trigger != null; if (stopMerges) { return; } - + // Do not start new merges if we've hit OOME if (hitOOM) { return; } - + final MergePolicy.MergeSpecification spec; if (maxNumSegments != UNBOUNDED_MAX_MERGE_SEGMENTS) { - assert trigger == MergeTrigger.EXPLICIT || trigger == MergeTrigger.MERGE_FINISHED : - "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: " + trigger.name(); - spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge)); + assert trigger == MergeTrigger.EXPLICIT + || trigger == MergeTrigger.MERGE_FINISHED : "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: " + + trigger.name(); + spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, + Collections.unmodifiableMap(segmentsToMerge)); if (spec != null) { final int numMerges = spec.merges.size(); - for(int i=0;iDo not alter the returned collection! */ + + /** + * Expert: to be used by a {@link MergePolicy} to avoid selecting merges for + * segments already being merged. The returned collection is not cloned, and + * thus is only safe to access if you hold IndexWriter's lock (which you do + * when IndexWriter invokes the MergePolicy). + * + *

    + * Do not alter the returned collection! + */ public synchronized Collection getMergingSegments() { return mergingSegments; } - + /** * Expert: the {@link MergeScheduler} calls this method to retrieve the next * merge requested by the MergePolicy @@ -1896,7 +2135,7 @@ return merge; } } - + /** * Expert: returns true if there are merges waiting to be scheduled. * @@ -1905,97 +2144,97 @@ public synchronized boolean hasPendingMerges() { return pendingMerges.size() != 0; } - + /** - * Close the IndexWriter without committing - * any changes that have occurred since the last commit - * (or since it was opened, if commit hasn't been called). - * This removes any temporary files that had been created, - * after which the state of the index will be the same as - * it was when commit() was last called or when this - * writer was first opened. This also clears a previous - * call to {@link #prepareCommit}. - * @throws IOException if there is a low-level IO error + * Close the IndexWriter without committing any changes that have + * occurred since the last commit (or since it was opened, if commit hasn't + * been called). This removes any temporary files that had been created, after + * which the state of the index will be the same as it was when commit() was + * last called or when this writer was first opened. This also clears a + * previous call to {@link #prepareCommit}. + * + * @throws IOException + * if there is a low-level IO error */ @Override public void rollback() throws IOException { ensureOpen(); - + // Ensure that only one thread actually gets to do the // closing, and make sure no commit is also in progress: - synchronized(commitLock) { + synchronized (commitLock) { if (shouldClose()) { rollbackInternal(); } } } - + private void rollbackInternal() throws IOException { - + boolean success = false; - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "rollback"); } - try { - synchronized(this) { + synchronized (this) { finishMerges(false); stopMerges = true; } - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "rollback: done finish merges"); } - + // Must pre-close these two, in case they increment // changeCount so that we can then set it to false // before calling closeInternal mergePolicy.close(); mergeScheduler.close(); - + bufferedDeletesStream.clear(); - docWriter.close(); // mark it as closed first to prevent subsequent indexing actions/flushes + docWriter.close(); // mark it as closed first to prevent subsequent + // indexing actions/flushes docWriter.abort(); - synchronized(this) { - + synchronized (this) { + if (pendingCommit != null) { pendingCommit.rollbackCommit(directory); deleter.decRef(pendingCommit); pendingCommit = null; notifyAll(); } - + // Don't bother saving any changes in our segmentInfos readerPool.dropAll(false); - + // Keep the same segmentInfos instance but replace all - // of its SegmentInfo instances. This is so the next + // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write // once"). segmentInfos.rollbackSegmentInfos(rollbackSegments); - if (infoStream.isEnabled("IW") ) { - infoStream.message("IW", "rollback: infos=" + segString(segmentInfos)); + if (infoStream.isEnabled("IW")) { + infoStream + .message("IW", "rollback: infos=" + segString(segmentInfos)); } - assert testPoint("rollback before checkpoint"); - + // Ask deleter to locate unreferenced files & remove // them: deleter.checkpoint(segmentInfos, false); deleter.refresh(); - + lastCommitChangeCount = changeCount; } - + success = true; } catch (OutOfMemoryError oom) { handleOOM(oom, "rollbackInternal"); } finally { - synchronized(this) { + synchronized (this) { if (!success) { closing = false; notifyAll(); @@ -2005,47 +2244,51 @@ } } } - + closeInternal(false, false); } - + /** * Delete all documents in the index. - * - *

    This method will drop all buffered documents and will - * remove all segments from the index. This change will not be - * visible until a {@link #commit()} has been called. This method - * can be rolled back using {@link #rollback()}.

    - * - *

    NOTE: this method is much faster than using deleteDocuments( new MatchAllDocsQuery() ).

    - * - *

    NOTE: this method will forcefully abort all merges - * in progress. If other threads are running {@link - * #forceMerge}, {@link #addIndexes(IndexReader[])} or - * {@link #forceMergeDeletes} methods, they may receive - * {@link MergePolicy.MergeAbortedException}s. + * + *

    + * This method will drop all buffered documents and will remove all segments + * from the index. This change will not be visible until a {@link #commit()} + * has been called. This method can be rolled back using {@link #rollback()}. + *

    + * + *

    + * NOTE: this method is much faster than using deleteDocuments( new + * MatchAllDocsQuery() ). + *

    + * + *

    + * NOTE: this method will forcefully abort all merges in progress. If other + * threads are running {@link #forceMerge}, {@link #addIndexes(IndexReader[])} + * or {@link #forceMergeDeletes} methods, they may receive + * {@link MergePolicy.MergeAbortedException}s. */ public synchronized void deleteAll() throws IOException { ensureOpen(); boolean success = false; try { - + // Abort any running merges finishMerges(false); - + // Remove any buffered docs docWriter.abort(); - + // Remove all segments segmentInfos.clear(); - + // Ask deleter to locate unreferenced files & remove them: deleter.checkpoint(segmentInfos, false); deleter.refresh(); - + // Don't bother saving any changes in our segmentInfos readerPool.dropAll(false); - + // Mark that the index has changed ++changeCount; segmentInfos.changed(); @@ -2060,50 +2303,53 @@ } } } - + private synchronized void finishMerges(boolean waitForMerges) { if (!waitForMerges) { - + stopMerges = true; - + // Abort all pending & running merges: for (final MergePolicy.OneMerge merge : pendingMerges) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "now abort pending merge " + segString(merge.segments)); + infoStream.message("IW", "now abort pending merge " + + segString(merge.segments)); } merge.abort(); mergeFinish(merge); } pendingMerges.clear(); - + for (final MergePolicy.OneMerge merge : runningMerges) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "now abort running merge " + segString(merge.segments)); + infoStream.message("IW", "now abort running merge " + + segString(merge.segments)); } merge.abort(); } - + // These merges periodically check whether they have - // been aborted, and stop if so. We wait here to make - // sure they all stop. It should not take very long + // been aborted, and stop if so. We wait here to make + // sure they all stop. It should not take very long // because the merge threads periodically check if // they are aborted. - while(runningMerges.size() > 0) { + while (runningMerges.size() > 0) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "now wait for " + runningMerges.size() + " running merge/s to abort"); + infoStream.message("IW", "now wait for " + runningMerges.size() + + " running merge/s to abort"); } doWait(); } - + stopMerges = false; notifyAll(); - + assert 0 == mergingSegments.size(); - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "all running merges have aborted"); } - + } else { // waitForMerges() will ensure any running addIndexes finishes. // It's fine if a new one attempts to start because from our @@ -2113,43 +2359,51 @@ waitForMerges(); } } - + /** * Wait for any currently outstanding merges to finish. - * - *

    It is guaranteed that any merges started prior to calling this method - * will have completed once this method completes.

    + * + *

    + * It is guaranteed that any merges started prior to calling this method will + * have completed once this method completes. + *

    */ public synchronized void waitForMerges() { ensureOpen(false); if (infoStream.isEnabled("IW")) { infoStream.message("IW", "waitForMerges"); } - while(pendingMerges.size() > 0 || runningMerges.size() > 0) { + while (pendingMerges.size() > 0 || runningMerges.size() > 0) { doWait(); } - + // sanity check assert 0 == mergingSegments.size(); - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "waitForMerges done"); } } - + /** - * Called whenever the SegmentInfos has been updated and - * the index files referenced exist (correctly) in the - * index directory. + * Called whenever the SegmentInfos has been updated and the index files + * referenced exist (correctly) in the index directory. */ synchronized void checkpoint() throws IOException { changeCount++; segmentInfos.changed(); deleter.checkpoint(segmentInfos, false); } - + + void writeSegmentUpdates(SegmentInfoPerCommit segment, + UpdatedSegmentData updates, IOContext context) throws IOException { + docWriter.writeUpdatedSegment(updates, segment, this.deleter); + + segment.advanceUpdateGen(); + } + synchronized void publishFrozenDeletes(FrozenBufferedDeletes packet) { - assert packet != null && packet.any(); + assert packet != null && (packet.anyDeletes() || packet.anyUpdates()); synchronized (bufferedDeletesStream) { bufferedDeletesStream.push(packet); } @@ -2160,20 +2414,22 @@ * segments SegmentInfo to the index writer. */ synchronized void publishFlushedSegment(SegmentInfoPerCommit newSegment, - FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket) throws IOException { + FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket) + throws IOException { // Lock order IW -> BDS synchronized (bufferedDeletesStream) { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "publishFlushedSegment"); } - if (globalPacket != null && globalPacket.any()) { + if (globalPacket != null + && (globalPacket.anyDeletes() || globalPacket.anyUpdates())) { bufferedDeletesStream.push(globalPacket); - } + } // Publishing the segment must be synched on IW -> BDS to make the sure // that no merge prunes away the seg. private delete packet final long nextGen; - if (packet != null && packet.any()) { + if (packet != null && (packet.anyDeletes())) { nextGen = bufferedDeletesStream.push(packet); } else { // Since we don't have a delete packet to apply we can get a new @@ -2181,92 +2437,98 @@ nextGen = bufferedDeletesStream.getNextGen(); } if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "publish sets newSegment delGen=" + nextGen + " seg=" + segString(newSegment)); + infoStream.message("IW", "publish sets newSegment delGen=" + nextGen + + " seg=" + segString(newSegment)); } newSegment.setBufferedDeletesGen(nextGen); segmentInfos.add(newSegment); checkpoint(); } } - - synchronized boolean useCompoundFile(SegmentInfoPerCommit segmentInfo) throws IOException { + + synchronized boolean useCompoundFile(SegmentInfoPerCommit segmentInfo) + throws IOException { + if (segmentInfo.getUpdateGen() > 0) { + return segmentInfo.info.getUseCompoundFile(); + } return mergePolicy.useCompoundFile(segmentInfos, segmentInfo); } - + private synchronized void resetMergeExceptions() { mergeExceptions = new ArrayList(); mergeGen++; } - + private void noDupDirs(Directory... dirs) { HashSet dups = new HashSet(); - for(int i=0;iThis may be used to parallelize batch indexing. A large document - * collection can be broken into sub-collections. Each sub-collection can be - * indexed in parallel, on a different thread, process or machine. The - * complete index can then be created by merging sub-collection indexes - * with this method. - * + * *

    - * NOTE: the index in each {@link Directory} must not be - * changed (opened by a writer) while this method is - * running. This method does not acquire a write lock in - * each input Directory, so it is up to the caller to + * This may be used to parallelize batch indexing. A large document collection + * can be broken into sub-collections. Each sub-collection can be indexed in + * parallel, on a different thread, process or machine. The complete index can + * then be created by merging sub-collection indexes with this method. + * + *

    + * NOTE: the index in each {@link Directory} must not be changed + * (opened by a writer) while this method is running. This method does not + * acquire a write lock in each input Directory, so it is up to the caller to * enforce this. - * - *

    This method is transactional in how Exceptions are - * handled: it does not commit a new segments_N file until - * all indexes are added. This means if an Exception - * occurs (for example disk full), then either no indexes - * will have been added or they all will have been. - * - *

    Note that this requires temporary free space in the - * {@link Directory} up to 2X the sum of all input indexes - * (including the starting index). If readers/searchers - * are open against the starting index, then temporary - * free space required will be higher by the size of the - * starting index (see {@link #forceMerge(int)} for details). - * + * *

    + * This method is transactional in how Exceptions are handled: it does not + * commit a new segments_N file until all indexes are added. This means if an + * Exception occurs (for example disk full), then either no indexes will have + * been added or they all will have been. + * + *

    + * Note that this requires temporary free space in the {@link Directory} up to + * 2X the sum of all input indexes (including the starting index). If + * readers/searchers are open against the starting index, then temporary free + * space required will be higher by the size of the starting index (see + * {@link #forceMerge(int)} for details). + * + *

    * NOTE: this method only copies the segments of the incoming indexes * and does not merge them. Therefore deleted documents are not removed and * the new segments are not merged with the existing ones. - * - *

    This requires this index not be among those to be added. - * + * *

    - * NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * This requires this index not be among those to be added. + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ public void addIndexes(Directory... dirs) throws IOException { ensureOpen(); - + noDupDirs(dirs); - + try { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "flush at addIndexes(Directory...)"); } - + flush(false, true); - + List infos = new ArrayList(); - + boolean success = false; try { for (Directory dir : dirs) { @@ -2275,35 +2537,39 @@ } SegmentInfos sis = new SegmentInfos(); // read infos from dir sis.read(dir); - + for (SegmentInfoPerCommit info : sis) { - assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name; - + assert !infos.contains(info) : "dup info dir=" + info.info.dir + + " name=" + info.info.name; + String newSegName = newSegmentName(); - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " info=" + info); + infoStream + .message("IW", "addIndexes: process segment origName=" + + info.info.name + " newName=" + newSegName + " info=" + + info); } - - IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1)); - + + IOContext context = new IOContext(new MergeInfo( + info.info.getDocCount(), info.info.sizeInBytes(), true, -1)); + infos.add(copySegmentAsIs(info, newSegName, context)); } } success = true; } finally { if (!success) { - for(SegmentInfoPerCommit sipc : infos) { - for(String file : sipc.files()) { + for (SegmentInfoPerCommit sipc : infos) { + for (String file : sipc.files()) { try { directory.deleteFile(file); - } catch (Throwable t) { - } + } catch (Throwable t) {} } } } } - + synchronized (this) { success = false; try { @@ -2311,12 +2577,11 @@ success = true; } finally { if (!success) { - for(SegmentInfoPerCommit sipc : infos) { - for(String file : sipc.files()) { + for (SegmentInfoPerCommit sipc : infos) { + for (String file : sipc.files()) { try { directory.deleteFile(file); - } catch (Throwable t) { - } + } catch (Throwable t) {} } } } @@ -2324,7 +2589,7 @@ segmentInfos.addAll(infos); checkpoint(); } - + } catch (OutOfMemoryError oom) { handleOOM(oom, "addIndexes(Directory...)"); } @@ -2368,98 +2633,108 @@ public void addIndexes(IndexReader... readers) throws IOException { ensureOpen(); int numDocs = 0; - + try { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "flush at addIndexes(IndexReader...)"); } flush(false, true); - + String mergedName = newSegmentName(); for (IndexReader indexReader : readers) { numDocs += indexReader.numDocs(); } - final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1)); - + final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, + -1)); + // TODO: somehow we should fix this merge so it's // abortable so that IW.close(false) is able to stop it - TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); - - SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1, - false, codec, null, null); - - SegmentMerger merger = new SegmentMerger(info, infoStream, trackingDir, config.getTermIndexInterval(), - MergeState.CheckAbort.NONE, globalFieldNumberMap, context); - - for (IndexReader reader : readers) { // add new indexes + TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper( + directory); + + SegmentInfo info = new SegmentInfo(directory, + Constants.LUCENE_MAIN_VERSION, mergedName, -1, false, codec, null, + null); + + SegmentMerger merger = new SegmentMerger(info, infoStream, trackingDir, + config.getTermIndexInterval(), MergeState.CheckAbort.NONE, + globalFieldNumberMap, context); + + for (IndexReader reader : readers) { // add new indexes merger.add(reader); } - + MergeState mergeState; boolean success = false; try { - mergeState = merger.merge(); // merge 'em + mergeState = merger.merge(); // merge 'em success = true; } finally { - if (!success) { - synchronized(this) { + if (!success) { + synchronized (this) { deleter.refresh(info.name); } } } - - SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, 0, -1L); - + + SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, 0, + -1L, -1L); + info.setFiles(new HashSet(trackingDir.getCreatedFiles())); trackingDir.getCreatedFiles().clear(); - + setDiagnostics(info, "addIndexes(IndexReader...)"); - + boolean useCompoundFile; - synchronized(this) { // Guard segmentInfos + synchronized (this) { // Guard segmentInfos if (stopMerges) { deleter.deleteNewFiles(infoPerCommit.files()); return; } ensureOpen(); - useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, infoPerCommit); + useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, + infoPerCommit); } - + // Now create the compound file if needed if (useCompoundFile) { Collection filesToDelete = infoPerCommit.files(); try { - createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context); + createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, + info, context, -1); } finally { // delete new non cfs files directly: they were never // registered with IFD - synchronized(this) { + synchronized (this) { deleter.deleteNewFiles(filesToDelete); } } info.setUseCompoundFile(true); } - - // Have codec write SegmentInfo. Must do this after + + // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: success = false; try { - codec.segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, info, mergeState.fieldInfos, context); + SegmentInfoWriter segmentInfoWriter = codec.segmentInfoFormat() + .getSegmentInfoWriter(); + segmentInfoWriter.write(trackingDir, info, mergeState.fieldInfos, + context); success = true; } finally { if (!success) { - synchronized(this) { + synchronized (this) { deleter.refresh(info.name); } } } - + info.addFiles(trackingDir.getCreatedFiles()); - + // Register the new segment - synchronized(this) { + synchronized (this) { if (stopMerges) { deleter.deleteNewFiles(info.files()); return; @@ -2472,10 +2747,10 @@ handleOOM(oom, "addIndexes(IndexReader...)"); } } - + /** Copies the segment files as-is into the IndexWriter's directory. */ - private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName, IOContext context) - throws IOException { + private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, + String segName, IOContext context) throws IOException { // note: we don't really need this fis (its copied), but we load it up // so we don't pass a null value to the si writer @@ -2483,64 +2758,72 @@ final Map attributes; // copy the attributes map, we might modify it below. - // also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something). + // also we need to ensure its read-write, since we will invoke the SIwriter + // (which might want to set something). if (info.info.attributes() == null) { attributes = new HashMap(); } else { attributes = new HashMap(info.info.attributes()); } - - //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion()); + + // System.out.println("copy seg=" + info.info.name + " version=" + + // info.info.getVersion()); // Same SI as before but we change directory and name - SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(), - info.info.getUseCompoundFile(), - info.info.getCodec(), info.info.getDiagnostics(), attributes); - SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo, info.getDelCount(), info.getDelGen()); - + SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), + segName, info.info.getDocCount(), info.info.getUseCompoundFile(), + info.info.getCodec(), info.info.getDiagnostics(), attributes); + SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo, + info.getDelCount(), info.getDelGen(), -1L); + Set segFiles = new HashSet(); - - // Build up new segment's file names. Must do this + + // Build up new segment's file names. Must do this // before writing SegmentInfo: - for (String file: info.files()) { + for (String file : info.files()) { final String newFileName; newFileName = segName + IndexFileNames.stripSegmentName(file); segFiles.add(newFileName); } newInfo.setFiles(segFiles); - - // We must rewrite the SI file because it references segment name in its list of files, etc - TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); - + + // We must rewrite the SI file because it references segment name in its + // list of files, etc + TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper( + directory); + boolean success = false; - + try { - - newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context); - + + SegmentInfoWriter segmentInfoWriter = newInfo.getCodec() + .segmentInfoFormat().getSegmentInfoWriter(); + segmentInfoWriter.write(trackingDir, newInfo, fis, context); + final Collection siFiles = trackingDir.getCreatedFiles(); - + // Copy the segment's files - for (String file: info.files()) { - - final String newFileName = segName + IndexFileNames.stripSegmentName(file); - + for (String file : info.files()) { + + final String newFileName = segName + + IndexFileNames.stripSegmentName(file); + if (siFiles.contains(newFileName)) { // We already rewrote this above continue; } - - assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles; - + + assert !directory.fileExists(newFileName) : "file \"" + newFileName + + "\" already exists; siFiles=" + siFiles; + info.info.dir.copy(directory, file, newFileName, context); } success = true; } finally { if (!success) { - for(String file : newInfo.files()) { + for (String file : newInfo.files()) { try { directory.deleteFile(file); - } catch (Throwable t) { - } + } catch (Throwable t) {} } } } @@ -2554,95 +2837,99 @@ * is committed (new segments_N file written). */ protected void doAfterFlush() throws IOException {} - + /** * A hook for extending classes to execute operations before pending added and * deleted documents are flushed to the Directory. */ protected void doBeforeFlush() throws IOException {} - - /**

    Expert: prepare for commit. This does the - * first phase of 2-phase commit. This method does all - * steps necessary to commit changes since this writer - * was opened: flushes pending added and deleted docs, - * syncs the index files, writes most of next segments_N - * file. After calling this you must call either {@link - * #commit()} to finish the commit, or {@link - * #rollback()} to revert the commit and undo all changes - * done since the writer was opened.

    - * - *

    You can also just call {@link #commit()} directly - * without prepareCommit first in which case that method - * will internally call prepareCommit. - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    + + /** + *

    + * Expert: prepare for commit. This does the first phase of 2-phase commit. + * This method does all steps necessary to commit changes since this writer + * was opened: flushes pending added and deleted docs, syncs the index files, + * writes most of next segments_N file. After calling this you must call + * either {@link #commit()} to finish the commit, or {@link #rollback()} to + * revert the commit and undo all changes done since the writer was opened. + *

    + * + *

    + * You can also just call {@link #commit()} directly without prepareCommit + * first in which case that method will internally call prepareCommit. + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    */ @Override public final void prepareCommit() throws IOException { ensureOpen(); prepareCommitInternal(); } - + private void prepareCommitInternal() throws IOException { - synchronized(commitLock) { + synchronized (commitLock) { ensureOpen(false); if (infoStream.isEnabled("IW")) { infoStream.message("IW", "prepareCommit: flush"); infoStream.message("IW", " index before flush " + segString()); } - + if (hitOOM) { - throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit"); + throw new IllegalStateException( + "this writer hit an OutOfMemoryError; cannot commit"); } - + if (pendingCommit != null) { - throw new IllegalStateException("prepareCommit was already called with no corresponding call to commit"); + throw new IllegalStateException( + "prepareCommit was already called with no corresponding call to commit"); } - + doBeforeFlush(); assert testPoint("startDoFlush"); SegmentInfos toCommit = null; boolean anySegmentsFlushed = false; - + // This is copied from doFlush, except it's modified to // clone & incRef the flushed SegmentInfos inside the // sync block: - + try { - + synchronized (fullFlushLock) { boolean flushSuccess = false; boolean success = false; try { anySegmentsFlushed = docWriter.flushAllThreads(); if (!anySegmentsFlushed) { - // prevent double increment since docWriter#doFlush increments the flushcount + // prevent double increment since docWriter#doFlush increments the + // flushcount // if we flushed anything. flushCount.incrementAndGet(); } flushSuccess = true; - - synchronized(this) { + + synchronized (this) { maybeApplyDeletes(true); - + readerPool.commit(segmentInfos); - + // Must clone the segmentInfos while we still // hold fullFlushLock and while sync'd so that // no partial changes (eg a delete w/o // corresponding add from an updateDocument) can // sneak into the commit point: toCommit = segmentInfos.clone(); - + pendingCommitChangeCount = changeCount; - + // This protects the segmentInfos we are now going - // to commit. This is important in case, eg, while + // to commit. This is important in case, eg, while // we are trying to sync all referenced files, a // merge completes which would otherwise have - // removed the files we are now syncing. + // removed the files we are now syncing. filesToCommit = toCommit.files(directory, false); deleter.incRef(filesToCommit); } @@ -2661,7 +2948,7 @@ } catch (OutOfMemoryError oom) { handleOOM(oom, "prepareCommit"); } - + boolean success = false; try { if (anySegmentsFlushed) { @@ -2676,17 +2963,17 @@ } } } - + startCommit(toCommit); } } /** * Sets the commit user data map. That method is considered a transaction by - * {@link IndexWriter} and will be {@link #commit() committed} even if no other - * changes were made to the writer instance. Note that you must call this method - * before {@link #prepareCommit()}, or otherwise it won't be included in the - * follow-on {@link #commit()}. + * {@link IndexWriter} and will be {@link #commit() committed} even if no + * other changes were made to the writer instance. Note that you must call + * this method before {@link #prepareCommit()}, or otherwise it won't be + * included in the follow-on {@link #commit()}. *

    * NOTE: the map is cloned internally, therefore altering the map's * contents after calling this method has no effect. @@ -2707,34 +2994,34 @@ // Used only by commit and prepareCommit, below; lock // order is commitLock -> IW private final Object commitLock = new Object(); - + /** - *

    Commits all pending changes (added & deleted - * documents, segment merges, added - * indexes, etc.) to the index, and syncs all referenced - * index files, such that a reader will see the changes - * and the index updates will survive an OS or machine - * crash or power loss. Note that this does not wait for - * any running background merges to finish. This may be a - * costly operation, so you should test the cost in your - * application and do it only when really necessary.

    - * - *

    Note that this operation calls Directory.sync on - * the index files. That call should not return until the - * file contents & metadata are on stable storage. For - * FSDirectory, this calls the OS's fsync. But, beware: - * some hardware devices may in fact cache writes even - * during fsync, and return before the bits are actually - * on stable storage, to give the appearance of faster - * performance. If you have such a device, and it does - * not have a battery backup (for example) then on power - * loss it may still lose data. Lucene cannot guarantee - * consistency on such devices.

    - * - *

    NOTE: if this method hits an OutOfMemoryError - * you should immediately close the writer. See above for details.

    - * + *

    + * Commits all pending changes (added & deleted documents, segment merges, + * added indexes, etc.) to the index, and syncs all referenced index files, + * such that a reader will see the changes and the index updates will survive + * an OS or machine crash or power loss. Note that this does not wait for any + * running background merges to finish. This may be a costly operation, so you + * should test the cost in your application and do it only when really + * necessary. + *

    + * + *

    + * Note that this operation calls Directory.sync on the index files. That call + * should not return until the file contents & metadata are on stable storage. + * For FSDirectory, this calls the OS's fsync. But, beware: some hardware + * devices may in fact cache writes even during fsync, and return before the + * bits are actually on stable storage, to give the appearance of faster + * performance. If you have such a device, and it does not have a battery + * backup (for example) then on power loss it may still lose data. Lucene + * cannot guarantee consistency on such devices. + *

    + * + *

    + * NOTE: if this method hits an OutOfMemoryError you should immediately + * close the writer. See above for details. + *

    + * * @see #prepareCommit */ @Override @@ -2742,20 +3029,20 @@ ensureOpen(); commitInternal(); } - + private final void commitInternal() throws IOException { - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "commit: start"); } - - synchronized(commitLock) { + + synchronized (commitLock) { ensureOpen(false); - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "commit: enter lock"); } - + if (pendingCommit == null) { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "commit: now prepare"); @@ -2766,13 +3053,13 @@ infoStream.message("IW", "commit: already prepared"); } } - + finishCommit(); } } - + private synchronized final void finishCommit() throws IOException { - + if (pendingCommit != null) { try { if (infoStream.isEnabled("IW")) { @@ -2780,7 +3067,8 @@ } pendingCommit.finishCommit(directory); if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "commit: wrote segments file \"" + pendingCommit.getSegmentsFileName() + "\""); + infoStream.message("IW", "commit: wrote segments file \"" + + pendingCommit.getSegmentsFileName() + "\""); } lastCommitChangeCount = pendingCommitChangeCount; segmentInfos.updateGeneration(pendingCommit); @@ -2791,65 +3079,72 @@ deleter.decRef(filesToCommit); filesToCommit = null; pendingCommit = null; + updatesPending = false; notifyAll(); } - + } else { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "commit: pendingCommit == null; skip"); } } - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "commit: done"); } } - + // Ensures only one flush() is actually flushing segments // at a time: private final Object fullFlushLock = new Object(); - + /** - * Flush all in-memory buffered updates (adds and deletes) - * to the Directory. - * @param triggerMerge if true, we may merge segments (if - * deletes or docs were flushed) if necessary - * @param applyAllDeletes whether pending deletes should also + * Flush all in-memory buffered updates (adds and deletes) to the Directory. + * + * @param triggerMerge + * if true, we may merge segments (if deletes or docs were flushed) + * if necessary + * @param applyAllDeletes + * whether pending deletes should also */ - protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws IOException { - + protected final void flush(boolean triggerMerge, boolean applyAllDeletes) + throws IOException { + // NOTE: this method cannot be sync'd because // maybeMerge() in turn calls mergeScheduler.merge which // in turn can take a long time to run and we don't want - // to hold the lock for that. In the case of + // to hold the lock for that. In the case of // ConcurrentMergeScheduler this can lead to deadlock // when it stalls due to too many running merges. - - // We can be called during close, when closing==true, so we must pass false to ensureOpen: + + // We can be called during close, when closing==true, so we must pass false + // to ensureOpen: ensureOpen(false); if (doFlush(applyAllDeletes) && triggerMerge) { maybeMerge(MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS); } } - + private boolean doFlush(boolean applyAllDeletes) throws IOException { if (hitOOM) { - throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush"); + throw new IllegalStateException( + "this writer hit an OutOfMemoryError; cannot flush"); } - + doBeforeFlush(); assert testPoint("startDoFlush"); boolean success = false; try { - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", " start flush: applyAllDeletes=" + applyAllDeletes); + infoStream.message("IW", " start flush: applyAllDeletes=" + + applyAllDeletes); infoStream.message("IW", " index before flush " + segString()); } final boolean anySegmentFlushed; synchronized (fullFlushLock) { - boolean flushSuccess = false; + boolean flushSuccess = false; try { anySegmentFlushed = docWriter.flushAllThreads(); flushSuccess = true; @@ -2857,7 +3152,7 @@ docWriter.finishFullFlush(flushSuccess); } } - synchronized(this) { + synchronized (this) { maybeApplyDeletes(applyAllDeletes); doAfterFlush(); if (!anySegmentFlushed) { @@ -2880,27 +3175,32 @@ } } - final synchronized void maybeApplyDeletes(boolean applyAllDeletes) throws IOException { + final synchronized void maybeApplyDeletes(boolean applyAllDeletes) + throws IOException { if (applyAllDeletes) { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "apply all deletes during flush"); } applyAllDeletes(); } else if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); + infoStream.message("IW", "don't apply deletes now delTermCount=" + + bufferedDeletesStream.numTerms() + " bytesUsed=" + + bufferedDeletesStream.bytesUsed()); } } final synchronized void applyAllDeletes() throws IOException { flushDeletesCount.incrementAndGet(); final BufferedDeletesStream.ApplyDeletesResult result; - result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos.asList()); + result = bufferedDeletesStream.applyDeletes(readerPool, + segmentInfos.asList()); if (result.anyDeletes) { checkpoint(); } if (!keepFullyDeletedSegments && result.allDeleted != null) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "drop 100% deleted segments: " + segString(result.allDeleted)); + infoStream.message("IW", "drop 100% deleted segments: " + + segString(result.allDeleted)); } for (SegmentInfoPerCommit info : result.allDeleted) { // If a merge has already registered for this @@ -2916,13 +3216,15 @@ } bufferedDeletesStream.prune(segmentInfos); } - - /** Expert: Return the total size of all index files currently cached in memory. - * Useful for size management with flushRamDocs() + + /** + * Expert: Return the total size of all index files currently cached in + * memory. Useful for size management with flushRamDocs() */ public final long ramSizeInBytes() { ensureOpen(); - return docWriter.flushControl.netBytes() + bufferedDeletesStream.bytesUsed(); + return docWriter.flushControl.netBytes() + + bufferedDeletesStream.bytesUsed(); } // for testing only @@ -2931,50 +3233,55 @@ assert test = true; return test ? docWriter : null; } - - /** Expert: Return the number of documents currently - * buffered in RAM. */ + + /** + * Expert: Return the number of documents currently buffered in RAM. + */ public final synchronized int numRamDocs() { ensureOpen(); return docWriter.getNumDocs(); } - + private synchronized void ensureValidMerge(MergePolicy.OneMerge merge) { - for(SegmentInfoPerCommit info : merge.segments) { + for (SegmentInfoPerCommit info : merge.segments) { if (!segmentInfos.contains(info)) { - throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.info.name + ") that is not in the current index " + segString(), directory); + throw new MergePolicy.MergeException("MergePolicy selected a segment (" + + info.info.name + ") that is not in the current index " + + segString(), directory); } } } - - /** Carefully merges deletes for the segments we just - * merged. This is tricky because, although merging will - * clear all deletes (compacts the documents), new - * deletes may have been flushed to the segments since - * the merge was started. This method "carries over" - * such new deletes onto the newly merged segment, and - * saves the resulting deletes file (incrementing the - * delete generation for merge.info). If no deletes were - * flushed, no new deletes file is saved. */ - synchronized private ReadersAndLiveDocs commitMergedDeletes(MergePolicy.OneMerge merge) throws IOException { - + + /** + * Carefully merges deletes for the segments we just merged. This is tricky + * because, although merging will clear all deletes (compacts the documents), + * new deletes may have been flushed to the segments since the merge was + * started. This method "carries over" such new deletes onto the newly merged + * segment, and saves the resulting deletes file (incrementing the delete + * generation for merge.info). If no deletes were flushed, no new deletes file + * is saved. + */ + synchronized private ReadersAndLiveDocs commitMergedDeletes( + MergePolicy.OneMerge merge) throws IOException { + assert testPoint("startCommitMergeDeletes"); - + final List sourceSegments = merge.segments; - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "commitMergeDeletes " + segString(merge.segments)); + infoStream.message("IW", "commitMergeDeletes " + + segString(merge.segments)); } - + // Carefully merge deletes that occurred after we // started merging: int docUpto = 0; long minGen = Long.MAX_VALUE; - + // Lazy init (only when we find a delete to carry over): ReadersAndLiveDocs mergedDeletes = null; - - for(int i=0; i < sourceSegments.size(); i++) { + + for (int i = 0; i < sourceSegments.size(); i++) { SegmentInfoPerCommit info = sourceSegments.get(i); minGen = Math.min(info.getBufferedDeletesGen(), minGen); final int docCount = info.info.getDocCount(); @@ -2982,35 +3289,35 @@ final Bits currentLiveDocs; final ReadersAndLiveDocs rld = readerPool.get(info, false); // We hold a ref so it should still be in the pool: - assert rld != null: "seg=" + info.info.name; + assert rld != null : "seg=" + info.info.name; currentLiveDocs = rld.getLiveDocs(); - + if (prevLiveDocs != null) { - + // If we had deletions on starting the merge we must // still have deletions now: assert currentLiveDocs != null; assert prevLiveDocs.length() == docCount; assert currentLiveDocs.length() == docCount; - + // There were deletes on this segment when the merge - // started. The merge has collapsed away those + // started. The merge has collapsed away those // deletes, but, if new deletes were flushed since // the merge started, we must now carefully keep any // newly flushed deletes but mapping them to the new // docIDs. - + // Since we copy-on-write, if any new deletes were // applied after merging has started, we can just // check if the before/after liveDocs have changed. // If so, we must carefully merge the liveDocs one // doc at a time: if (currentLiveDocs != prevLiveDocs) { - + // This means this segment received new deletes // since we started the merge, so we // must merge them: - for(int j=0;j merge.info.getBufferedDeletesGen(); - + merge.info.setBufferedDeletesGen(minGen); - + return mergedDeletes; } - - synchronized private boolean commitMerge(MergePolicy.OneMerge merge) throws IOException { - + + synchronized private boolean commitMerge(MergePolicy.OneMerge merge) + throws IOException { + assert testPoint("startCommitMerge"); - + if (hitOOM) { - throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete merge"); + throw new IllegalStateException( + "this writer hit an OutOfMemoryError; cannot complete merge"); } - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "commitMerge: " + segString(merge.segments) + " index=" + segString()); + infoStream.message("IW", "commitMerge: " + segString(merge.segments) + + " index=" + segString()); } - + assert merge.registerDone; - + // If merge was explicitly aborted, or, if rollback() or // rollbackTransaction() had been called since our merge // started (which results in an unqualified @@ -3096,52 +3408,56 @@ deleter.deleteNewFiles(merge.info.files()); return false; } - - final ReadersAndLiveDocs mergedDeletes = merge.info.info.getDocCount() == 0 ? null : commitMergedDeletes(merge); - + + final ReadersAndLiveDocs mergedDeletes = merge.info.info.getDocCount() == 0 ? null + : commitMergedDeletes(merge); + assert mergedDeletes == null || mergedDeletes.getPendingDeleteCount() != 0; - + // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound // format as well: - + assert !segmentInfos.contains(merge.info); - - final boolean allDeleted = merge.segments.size() == 0 || - merge.info.info.getDocCount() == 0 || - (mergedDeletes != null && - mergedDeletes.getPendingDeleteCount() == merge.info.info.getDocCount()); - + + final boolean allDeleted = merge.segments.size() == 0 + || merge.info.info.getDocCount() == 0 + || (mergedDeletes != null && mergedDeletes.getPendingDeleteCount() == merge.info.info + .getDocCount()); + if (infoStream.isEnabled("IW")) { if (allDeleted) { - infoStream.message("IW", "merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert")); + infoStream.message("IW", "merged segment " + merge.info + + " is 100% deleted" + + (keepFullyDeletedSegments ? "" : "; skipping insert")); } } - + final boolean dropSegment = allDeleted && !keepFullyDeletedSegments; - + // If we merged no segments then we better be dropping // the new segment: assert merge.segments.size() > 0 || dropSegment; - - assert merge.info.info.getDocCount() != 0 || keepFullyDeletedSegments || dropSegment; - + + assert merge.info.info.getDocCount() != 0 || keepFullyDeletedSegments + || dropSegment; + segmentInfos.applyMergeChanges(merge, dropSegment); - + if (mergedDeletes != null) { if (dropSegment) { mergedDeletes.dropChanges(); } readerPool.release(mergedDeletes); } - + if (dropSegment) { assert !segmentInfos.contains(merge.info); readerPool.drop(merge.info); deleter.deleteNewFiles(merge.info.files()); } - + boolean success = false; try { // Must close before checkpoint, otherwise IFD won't be @@ -3163,35 +3479,38 @@ } } } - + deleter.deletePendingFiles(); - + deleter.deletePendingFiles(); + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "after commitMerge: " + segString()); } - + if (merge.maxNumSegments != -1 && !dropSegment) { // cascade the forceMerge: if (!segmentsToMerge.containsKey(merge.info)) { segmentsToMerge.put(merge.info, Boolean.FALSE); } } - + return true; } - - final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException { - + + final private void handleMergeException(Throwable t, + MergePolicy.OneMerge merge) throws IOException { + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "handleMergeException: merge=" + segString(merge.segments) + " exc=" + t); + infoStream.message("IW", "handleMergeException: merge=" + + segString(merge.segments) + " exc=" + t); } - + // Set the exception on the merge, so if // forceMerge is waiting on us it sees the root // cause exception: merge.setException(t); addMergeException(merge); - + if (t instanceof MergePolicy.MergeAbortedException) { // We can ignore this exception (it happens when // close(false) or rollback is called), unless the @@ -3199,43 +3518,40 @@ // in which case we must throw it so, for example, the // rollbackTransaction code in addIndexes* is // executed. - if (merge.isExternal) - throw (MergePolicy.MergeAbortedException) t; - } else if (t instanceof IOException) - throw (IOException) t; - else if (t instanceof RuntimeException) - throw (RuntimeException) t; - else if (t instanceof Error) - throw (Error) t; + if (merge.isExternal) throw (MergePolicy.MergeAbortedException) t; + } else if (t instanceof IOException) throw (IOException) t; + else if (t instanceof RuntimeException) throw (RuntimeException) t; + else if (t instanceof Error) throw (Error) t; else - // Should not get here - throw new RuntimeException(t); + // Should not get here + throw new RuntimeException(t); } - + /** - * Merges the indicated segments, replacing them in the stack with a - * single segment. + * Merges the indicated segments, replacing them in the stack with a single + * segment. * * @lucene.experimental */ public void merge(MergePolicy.OneMerge merge) throws IOException { - + boolean success = false; - + final long t0 = System.currentTimeMillis(); - + try { try { try { mergeInit(merge); - //if (merge.info != null) { - //System.out.println("MERGE: " + merge.info.info.name); - //} - + // if (merge.info != null) { + // System.out.println("MERGE: " + merge.info.info.name); + // } + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "now merge\n merge=" + segString(merge.segments) + "\n index=" + segString()); + infoStream.message("IW", "now merge\n merge=" + + segString(merge.segments) + "\n index=" + segString()); } - + mergeMiddle(merge); mergeSuccess(merge); success = true; @@ -3243,9 +3559,9 @@ handleMergeException(t, merge); } } finally { - synchronized(this) { + synchronized (this) { mergeFinish(merge); - + if (!success) { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "hit exception during merge"); @@ -3254,12 +3570,14 @@ deleter.refresh(merge.info.info.name); } } - + // This merge (and, generally, any change to the // segments) may now enable new merges, so we call // merge policy & update pending merges. - if (success && !merge.isAborted() && (merge.maxNumSegments != -1 || (!closed && !closing))) { - updatePendingMerges(MergeTrigger.MERGE_FINISHED, merge.maxNumSegments); + if (success && !merge.isAborted() + && (merge.maxNumSegments != -1 || (!closed && !closing))) { + updatePendingMerges(MergeTrigger.MERGE_FINISHED, + merge.maxNumSegments); } } } @@ -3268,44 +3586,52 @@ } if (merge.info != null && !merge.isAborted()) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.info.getDocCount() + " docs"); + infoStream.message("IW", "merge time " + + (System.currentTimeMillis() - t0) + " msec for " + + merge.info.info.getDocCount() + " docs"); } } } - + /** Hook that's called when the specified merge is complete. */ - void mergeSuccess(MergePolicy.OneMerge merge) { - } - - /** Checks whether this merge involves any segments - * already participating in a merge. If not, this merge - * is "registered", meaning we record that its segments - * are now participating in a merge, and true is - * returned. Else (the merge conflicts) false is - * returned. */ - final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws IOException { - + void mergeSuccess(MergePolicy.OneMerge merge) {} + + /** + * Checks whether this merge involves any segments already participating in a + * merge. If not, this merge is "registered", meaning we record that its + * segments are now participating in a merge, and true is returned. Else (the + * merge conflicts) false is returned. + */ + final synchronized boolean registerMerge(MergePolicy.OneMerge merge) + throws IOException { + if (merge.registerDone) { return true; } assert merge.segments.size() > 0; - + if (stopMerges) { merge.abort(); - throw new MergePolicy.MergeAbortedException("merge is aborted: " + segString(merge.segments)); + throw new MergePolicy.MergeAbortedException("merge is aborted: " + + segString(merge.segments)); } - + boolean isExternal = false; - for(SegmentInfoPerCommit info : merge.segments) { + for (SegmentInfoPerCommit info : merge.segments) { if (mergingSegments.contains(info)) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "reject merge " + segString(merge.segments) + ": segment " + segString(info) + " is already marked for merge"); + infoStream + .message("IW", "reject merge " + segString(merge.segments) + + ": segment " + segString(info) + + " is already marked for merge"); } return false; } if (!segmentInfos.contains(info)) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "reject merge " + segString(merge.segments) + ": segment " + segString(info) + " does not exist in live infos"); + infoStream.message("IW", "reject merge " + segString(merge.segments) + + ": segment " + segString(info) + + " does not exist in live infos"); } return false; } @@ -3316,18 +3642,20 @@ merge.maxNumSegments = mergeMaxNumSegments; } } - + ensureValidMerge(merge); - + pendingMerges.add(merge); - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "add merge to pendingMerges: " + segString(merge.segments) + " [total " + pendingMerges.size() + " pending]"); + infoStream.message("IW", "add merge to pendingMerges: " + + segString(merge.segments) + " [total " + pendingMerges.size() + + " pending]"); } - + merge.mergeGen = mergeGen; merge.isExternal = isExternal; - + // OK it does not conflict; now record that this merge // is running (while synchronized) to avoid race // condition where two conflicting merges from different @@ -3335,31 +3663,35 @@ if (infoStream.isEnabled("IW")) { StringBuilder builder = new StringBuilder("registerMerge merging= ["); for (SegmentInfoPerCommit info : mergingSegments) { - builder.append(info.info.name).append(", "); + builder.append(info.info.name).append(", "); } builder.append("]"); - // don't call mergingSegments.toString() could lead to ConcurrentModException + // don't call mergingSegments.toString() could lead to + // ConcurrentModException // since merge updates the segments FieldInfos if (infoStream.isEnabled("IW")) { - infoStream.message("IW", builder.toString()); + infoStream.message("IW", builder.toString()); } } - for(SegmentInfoPerCommit info : merge.segments) { + for (SegmentInfoPerCommit info : merge.segments) { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "registerMerge info=" + segString(info)); } mergingSegments.add(info); } - + // Merge is now registered merge.registerDone = true; - + return true; } - - /** Does initial setup for a merge, which is fast but holds - * the synchronized lock on IndexWriter instance. */ - final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException { + + /** + * Does initial setup for a merge, which is fast but holds the synchronized + * lock on IndexWriter instance. + */ + final synchronized void mergeInit(MergePolicy.OneMerge merge) + throws IOException { boolean success = false; try { _mergeInit(merge); @@ -3373,44 +3705,48 @@ } } } - - synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException { - + + synchronized private void _mergeInit(MergePolicy.OneMerge merge) + throws IOException { + assert testPoint("startMergeInit"); - + assert merge.registerDone; assert merge.maxNumSegments == -1 || merge.maxNumSegments > 0; - + if (hitOOM) { - throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge"); + throw new IllegalStateException( + "this writer hit an OutOfMemoryError; cannot merge"); } - + if (merge.info != null) { // mergeInit already done return; } - + if (merge.isAborted()) { return; } - + // TODO: in the non-pool'd case this is somewhat // wasteful, because we open these readers, close them, - // and then open them again for merging. Maybe we + // and then open them again for merging. Maybe we // could pre-pool them somehow in that case... - + // Lock order: IW -> BD - final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments); - + final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream + .applyDeletes(readerPool, merge.segments); + if (result.anyDeletes) { checkpoint(); } - + if (!keepFullyDeletedSegments && result.allDeleted != null) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "drop 100% deleted segments: " + result.allDeleted); + infoStream.message("IW", "drop 100% deleted segments: " + + result.allDeleted); } - for(SegmentInfoPerCommit info : result.allDeleted) { + for (SegmentInfoPerCommit info : result.allDeleted) { segmentInfos.remove(info); if (merge.segments.contains(info)) { mergingSegments.remove(info); @@ -3420,42 +3756,45 @@ } checkpoint(); } - + // Bind a new segment name here so even with // ConcurrentMergePolicy we keep deterministic segment // names. final String mergeSegmentName = newSegmentName(); - SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, -1, false, codec, null, null); - merge.info = new SegmentInfoPerCommit(si, 0, -1L); - + SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, + mergeSegmentName, -1, false, codec, null, null); + merge.info = new SegmentInfoPerCommit(si, 0, -1L, -1L); + // Lock order: IW -> BD bufferedDeletesStream.prune(segmentInfos); - + Map details = new HashMap(); - details.put("mergeMaxNumSegments", ""+merge.maxNumSegments); + details.put("mergeMaxNumSegments", "" + merge.maxNumSegments); details.put("mergeFactor", Integer.toString(merge.segments.size())); setDiagnostics(si, "merge", details); - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "merge seg=" + merge.info.info.name + " " + segString(merge.segments)); + infoStream.message("IW", "merge seg=" + merge.info.info.name + " " + + segString(merge.segments)); } - + assert merge.estimatedMergeBytes == 0; - for(SegmentInfoPerCommit info : merge.segments) { + for (SegmentInfoPerCommit info : merge.segments) { if (info.info.getDocCount() > 0) { final int delCount = numDeletedDocs(info); assert delCount <= info.info.getDocCount(); - final double delRatio = ((double) delCount)/info.info.getDocCount(); + final double delRatio = ((double) delCount) / info.info.getDocCount(); merge.estimatedMergeBytes += info.info.sizeInBytes() * (1.0 - delRatio); } } } - + static void setDiagnostics(SegmentInfo info, String source) { setDiagnostics(info, source, null); } - - private static void setDiagnostics(SegmentInfo info, String source, Map details) { + + private static void setDiagnostics(SegmentInfo info, String source, + Map details) { Map diagnostics = new HashMap(); diagnostics.put("source", source); diagnostics.put("lucene.version", Constants.LUCENE_VERSION); @@ -3469,39 +3808,43 @@ } info.setDiagnostics(diagnostics); } - - /** Does fininishing for a merge, which is fast but holds - * the synchronized lock on IndexWriter instance. */ + + /** + * Does fininishing for a merge, which is fast but holds the synchronized lock + * on IndexWriter instance. + */ final synchronized void mergeFinish(MergePolicy.OneMerge merge) { - + // forceMerge, addIndexes or finishMerges may be waiting // on merges to finish. notifyAll(); - + // It's possible we are called twice, eg if there was an // exception inside mergeInit if (merge.registerDone) { final List sourceSegments = merge.segments; - for(SegmentInfoPerCommit info : sourceSegments) { + for (SegmentInfoPerCommit info : sourceSegments) { mergingSegments.remove(info); } merge.registerDone = false; } - + runningMerges.remove(merge); } - - private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException { + + private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, + boolean suppressExceptions) throws IOException { final int numSegments = merge.readers.size(); Throwable th = null; - + boolean drop = !suppressExceptions; for (int i = 0; i < numSegments; i++) { final SegmentReader sr = merge.readers.get(i); if (sr != null) { try { - final ReadersAndLiveDocs rld = readerPool.get(sr.getSegmentInfo(), false); + final ReadersAndLiveDocs rld = readerPool.get(sr.getSegmentInfo(), + false); // We still hold a ref so it should not have been removed: assert rld != null; if (drop) { @@ -3529,71 +3872,79 @@ throw new RuntimeException(th); } } - - /** Does the actual (time-consuming) work of the merge, - * but without holding synchronized lock on IndexWriter - * instance */ + + /** + * Does the actual (time-consuming) work of the merge, but without holding + * synchronized lock on IndexWriter instance + */ private int mergeMiddle(MergePolicy.OneMerge merge) throws IOException { - + merge.checkAborted(directory); - + final String mergedName = merge.info.info.name; - + List sourceSegments = merge.segments; IOContext context = new IOContext(merge.getMergeInfo()); - - final MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory); - final TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory); - - SegmentMerger merger = new SegmentMerger(merge.info.info, infoStream, dirWrapper, config.getTermIndexInterval(), checkAbort, - globalFieldNumberMap, context); - + + final MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, + directory); + final TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper( + directory); + + SegmentMerger merger = new SegmentMerger(merge.info.info, infoStream, + dirWrapper, config.getTermIndexInterval(), checkAbort, + globalFieldNumberMap, context); + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "merging " + segString(merge.segments)); } - + merge.readers = new ArrayList(); - + // This is try/finally to make sure merger's readers are // closed: boolean success = false; try { int segUpto = 0; - while(segUpto < sourceSegments.size()) { - + while (segUpto < sourceSegments.size()) { + final SegmentInfoPerCommit info = sourceSegments.get(segUpto); - + // Hold onto the "live" reader; we will use this to // commit merged deletes final ReadersAndLiveDocs rld = readerPool.get(info, true); SegmentReader reader = rld.getMergeReader(context); assert reader != null; - + // Carefully pull the most recent live docs: final Bits liveDocs; final int delCount; - - synchronized(this) { + + synchronized (this) { // Must sync to ensure BufferedDeletesStream // cannot change liveDocs/pendingDeleteCount while // we pull a copy: liveDocs = rld.getReadOnlyLiveDocs(); delCount = rld.getPendingDeleteCount() + info.getDelCount(); - + assert rld.verifyDocCounts(); - + if (infoStream.isEnabled("IW")) { if (rld.getPendingDeleteCount() != 0) { - infoStream.message("IW", "seg=" + segString(info) + " delCount=" + info.getDelCount() + " pendingDelCount=" + rld.getPendingDeleteCount()); + infoStream.message("IW", + "seg=" + segString(info) + " delCount=" + info.getDelCount() + + " pendingDelCount=" + rld.getPendingDeleteCount()); } else if (info.getDelCount() != 0) { - infoStream.message("IW", "seg=" + segString(info) + " delCount=" + info.getDelCount()); + infoStream.message("IW", "seg=" + segString(info) + " delCount=" + + info.getDelCount()); } else { - infoStream.message("IW", "seg=" + segString(info) + " no deletes"); + infoStream + .message("IW", "seg=" + segString(info) + " no deletes"); } } } - + // Deletes might have happened after we pulled the merge reader and // before we got a read-only copy of the segment's actual live docs // (taking pending deletes into account). In that case we need to @@ -3601,8 +3952,9 @@ if (reader.numDeletedDocs() != delCount) { // fix the reader's live docs and del count assert delCount > reader.numDeletedDocs(); // beware of zombies - - SegmentReader newReader = new SegmentReader(info, reader.core, liveDocs, info.info.getDocCount() - delCount); + + SegmentReader newReader = new SegmentReader(info, context, + reader.core, liveDocs, info.info.getDocCount() - delCount); boolean released = false; try { rld.release(reader); @@ -3612,20 +3964,23 @@ newReader.decRef(); } } - + reader = newReader; } - + merge.readers.add(reader); - assert delCount <= info.info.getDocCount(): "delCount=" + delCount + " info.docCount=" + info.info.getDocCount() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount(); + assert delCount <= info.info.getDocCount() : "delCount=" + delCount + + " info.docCount=" + info.info.getDocCount() + + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + + " info.getDelCount()=" + info.getDelCount(); if (delCount < info.info.getDocCount()) { merger.add(reader); } segUpto++; } - + merge.checkAborted(directory); - + // This is where all the work happens: MergeState mergeState; boolean success3 = false; @@ -3634,44 +3989,54 @@ success3 = true; } finally { if (!success3) { - synchronized(this) { + synchronized (this) { deleter.refresh(merge.info.info.name); } } } assert mergeState.segmentInfo == merge.info.info; - merge.info.info.setFiles(new HashSet(dirWrapper.getCreatedFiles())); - + merge.info.info + .setFiles(new HashSet(dirWrapper.getCreatedFiles())); + // Record which codec was used to write the segment - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "merge codec=" + codec + " docCount=" + merge.info.info.getDocCount() + "; merged segment has " + - (mergeState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " + - (mergeState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " + - (mergeState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " + - (mergeState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " + - (mergeState.fieldInfos.hasProx() ? "freqs" : "no freqs")); + infoStream.message("IW", "merge codec=" + + codec + + " docCount=" + + merge.info.info.getDocCount() + + "; merged segment has " + + (mergeState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + + "; " + + (mergeState.fieldInfos.hasNorms() ? "norms" : "no norms") + + "; " + + (mergeState.fieldInfos.hasDocValues() ? "docValues" + : "no docValues") + "; " + + (mergeState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " + + (mergeState.fieldInfos.hasProx() ? "freqs" : "no freqs")); } - + // Very important to do this before opening the reader // because codec must know if prox was written for // this segment: - //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name); + // System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + + // merge.info.name); boolean useCompoundFile; synchronized (this) { // Guard segmentInfos useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info); } - + if (useCompoundFile) { success = false; - + Collection filesToRemove = merge.info.files(); - + try { - filesToRemove = createCompoundFile(infoStream, directory, checkAbort, merge.info.info, context); + filesToRemove = createCompoundFile(infoStream, directory, checkAbort, + merge.info.info, context, -1); success = true; } catch (IOException ioe) { - synchronized(this) { + synchronized (this) { if (merge.isAborted()) { // This can happen if rollback or close(false) // is called -- fall through to logic below to @@ -3685,38 +4050,43 @@ } finally { if (!success) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "hit exception creating compound file during merge"); + infoStream.message("IW", + "hit exception creating compound file during merge"); } - - synchronized(this) { - deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); - deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + + synchronized (this) { + deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", + IndexFileNames.COMPOUND_FILE_EXTENSION)); + deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", + IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); deleter.deleteNewFiles(merge.info.files()); } } } - + // So that, if we hit exc in deleteNewFiles (next) // or in commitMerge (later), we close the // per-segment readers in the finally clause below: success = false; - - synchronized(this) { - + + synchronized (this) { + // delete new non cfs files directly: they were never // registered with IFD deleter.deleteNewFiles(filesToRemove); - + if (merge.isAborted()) { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "abort merge after building CFS"); } - deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); - deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", + IndexFileNames.COMPOUND_FILE_EXTENSION)); + deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", + IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); return 0; } } - + merge.info.info.setUseCompoundFile(true); } else { // So that, if we hit exc in commitMerge (later), @@ -3724,54 +4094,62 @@ // clause below: success = false; } - - // Have codec write SegmentInfo. Must do this after + + // Have codec write SegmentInfo. Must do this after // creating CFS so that 1) .si isn't slurped into CFS, // and 2) .si reflects useCompoundFile=true change // above: boolean success2 = false; try { - codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, merge.info.info, mergeState.fieldInfos, context); + SegmentInfoWriter segmentInfoWriter = codec.segmentInfoFormat() + .getSegmentInfoWriter(); + segmentInfoWriter.write(directory, merge.info.info, + mergeState.fieldInfos, context); success2 = true; } finally { if (!success2) { - synchronized(this) { + synchronized (this) { deleter.deleteNewFiles(merge.info.files()); } } } - + // TODO: ideally we would freeze merge.info here!! // because any changes after writing the .si will be - // lost... - + // lost... + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.)); + infoStream.message("IW", String.format(Locale.ROOT, + "merged segment size=%.3f MB vs estimate=%.3f MB", + merge.info.info.sizeInBytes() / 1024. / 1024., + merge.estimatedMergeBytes / 1024 / 1024.)); } - - final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer(); - if (poolReaders && mergedSegmentWarmer != null && merge.info.info.getDocCount() != 0) { + + final IndexReaderWarmer mergedSegmentWarmer = config + .getMergedSegmentWarmer(); + if (poolReaders && mergedSegmentWarmer != null + && merge.info.info.getDocCount() != 0) { final ReadersAndLiveDocs rld = readerPool.get(merge.info, true); final SegmentReader sr = rld.getReader(IOContext.READ); try { mergedSegmentWarmer.warm(sr); } finally { - synchronized(this) { + synchronized (this) { rld.release(sr); readerPool.release(rld); } } } - + // Force READ context because we merge deletes onto // this reader: if (!commitMerge(merge)) { // commitMerge will return false if this merge was aborted return 0; } - + success = true; - + } finally { // Readers are already closed in commitMerge if we didn't hit // an exc: @@ -3779,47 +4157,50 @@ closeMergeReaders(merge, true); } } - + return merge.info.info.getDocCount(); } - + synchronized void addMergeException(MergePolicy.OneMerge merge) { assert merge.getException() != null; if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen) { mergeExceptions.add(merge); } } - + // For test purposes. final int getBufferedDeleteTermsSize() { return docWriter.getBufferedDeleteTermsSize(); } - + // For test purposes. final int getNumBufferedDeleteTerms() { return docWriter.getNumBufferedDeleteTerms(); } - + // utility routines for tests synchronized SegmentInfoPerCommit newestSegment() { - return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null; + return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size() - 1) + : null; } - - /** Returns a string description of all segments, for - * debugging. - * - * @lucene.internal */ + + /** + * Returns a string description of all segments, for debugging. + * + * @lucene.internal + */ public synchronized String segString() { return segString(segmentInfos); } - - /** Returns a string description of the specified - * segments, for debugging. - * - * @lucene.internal */ + + /** + * Returns a string description of the specified segments, for debugging. + * + * @lucene.internal + */ public synchronized String segString(Iterable infos) { final StringBuilder buffer = new StringBuilder(); - for(final SegmentInfoPerCommit info : infos) { + for (final SegmentInfoPerCommit info : infos) { if (buffer.length() > 0) { buffer.append(' '); } @@ -3827,15 +4208,17 @@ } return buffer.toString(); } - - /** Returns a string description of the specified - * segment, for debugging. - * - * @lucene.internal */ + + /** + * Returns a string description of the specified segment, for debugging. + * + * @lucene.internal + */ public synchronized String segString(SegmentInfoPerCommit info) { - return info.toString(info.info.dir, numDeletedDocs(info) - info.getDelCount()); + return info.toString(info.info.dir, + numDeletedDocs(info) - info.getDelCount()); } - + private synchronized void doWait() { // NOTE: the callers of this method should in theory // be able to do simply wait(), but, as a defense @@ -3849,120 +4232,128 @@ throw new ThreadInterruptedException(ie); } } - + private boolean keepFullyDeletedSegments; - - /** Only for testing. - * - * @lucene.internal */ + + /** + * Only for testing. + * + * @lucene.internal + */ void keepFullyDeletedSegments() { keepFullyDeletedSegments = true; } - + boolean getKeepFullyDeletedSegments() { return keepFullyDeletedSegments; } - + // called only from assert private boolean filesExist(SegmentInfos toSync) throws IOException { Collection files = toSync.files(directory, false); - for(final String fileName: files) { - assert directory.fileExists(fileName): "file " + fileName + " does not exist"; + for (final String fileName : files) { + assert directory.fileExists(fileName) : "file " + fileName + + " does not exist"; // If this trips it means we are missing a call to // .checkpoint somewhere, because by the time we // are called, deleter should know about every // file referenced by the current head // segmentInfos: - assert deleter.exists(fileName): "IndexFileDeleter doesn't know about file " + fileName; + assert deleter.exists(fileName) : "IndexFileDeleter doesn't know about file " + + fileName; } return true; } - + // For infoStream output synchronized SegmentInfos toLiveInfos(SegmentInfos sis) { final SegmentInfos newSIS = new SegmentInfos(); - final Map liveSIS = new HashMap(); - for(SegmentInfoPerCommit info : segmentInfos) { + final Map liveSIS = new HashMap(); + for (SegmentInfoPerCommit info : segmentInfos) { liveSIS.put(info, info); } - for(SegmentInfoPerCommit info : sis) { + for (SegmentInfoPerCommit info : sis) { SegmentInfoPerCommit liveInfo = liveSIS.get(info); if (liveInfo != null) { info = liveInfo; } newSIS.add(info); } - + return newSIS; } - - /** Walk through all files referenced by the current - * segmentInfos and ask the Directory to sync each file, - * if it wasn't already. If that succeeds, then we - * prepare a new segments_N file but do not fully commit - * it. */ + + /** + * Walk through all files referenced by the current segmentInfos and ask the + * Directory to sync each file, if it wasn't already. If that succeeds, then + * we prepare a new segments_N file but do not fully commit it. + */ private void startCommit(final SegmentInfos toSync) throws IOException { - + assert testPoint("startStartCommit"); assert pendingCommit == null; - + if (hitOOM) { - throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit"); + throw new IllegalStateException( + "this writer hit an OutOfMemoryError; cannot commit"); } - + try { - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "startCommit(): start"); } - - synchronized(this) { - - assert lastCommitChangeCount <= changeCount: "lastCommitChangeCount=" + lastCommitChangeCount + " changeCount=" + changeCount; - + + synchronized (this) { + + assert lastCommitChangeCount <= changeCount : "lastCommitChangeCount=" + + lastCommitChangeCount + " changeCount=" + changeCount; + if (pendingCommitChangeCount == lastCommitChangeCount) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", " skip startCommit(): no changes pending"); + infoStream + .message("IW", " skip startCommit(): no changes pending"); } deleter.decRef(filesToCommit); filesToCommit = null; return; } - + if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "startCommit index=" + segString(toLiveInfos(toSync)) + " changeCount=" + changeCount); + infoStream.message("IW", "startCommit index=" + + segString(toLiveInfos(toSync)) + " changeCount=" + changeCount); } - + assert filesExist(toSync); } - + assert testPoint("midStartCommit"); - + boolean pendingCommitSet = false; - + try { - + assert testPoint("midStartCommit2"); - - synchronized(this) { - + + synchronized (this) { + assert pendingCommit == null; - + assert segmentInfos.getGeneration() == toSync.getGeneration(); - + // Exception here means nothing is prepared // (this method unwinds everything it did on // an exception) toSync.prepareCommit(directory); - //System.out.println("DONE prepareCommit"); - + // System.out.println("DONE prepareCommit"); + pendingCommitSet = true; pendingCommit = toSync; } - + // This call can take a long time -- 10s of seconds - // or more. We do it without syncing on this: + // or more. We do it without syncing on this: boolean success = false; final Collection filesToSync; try { @@ -3976,26 +4367,27 @@ toSync.rollbackCommit(directory); } } - + if (infoStream.isEnabled("IW")) { infoStream.message("IW", "done all syncs: " + filesToSync); } - + assert testPoint("midStartCommitSuccess"); - + } finally { - synchronized(this) { + synchronized (this) { // Have our master segmentInfos record the - // generations we just prepared. We do this + // generations we just prepared. We do this // on error or success so we don't // double-write a segments_N file. segmentInfos.updateGeneration(toSync); - + if (!pendingCommitSet) { if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "hit exception committing segments file"); + infoStream + .message("IW", "hit exception committing segments file"); } - + // Hit exception deleter.decRef(filesToCommit); filesToCommit = null; @@ -4007,54 +4399,60 @@ } assert testPoint("finishStartCommit"); } - + /** - * Returns true iff the index in the named directory is - * currently locked. - * @param directory the directory to check for a lock - * @throws IOException if there is a low-level IO error + * Returns true iff the index in the named directory is currently + * locked. + * + * @param directory + * the directory to check for a lock + * @throws IOException + * if there is a low-level IO error */ public static boolean isLocked(Directory directory) throws IOException { return directory.makeLock(WRITE_LOCK_NAME).isLocked(); } - + /** * Forcibly unlocks the index in the named directory. *

    - * Caution: this should only be used by failure recovery code, - * when it is known that no other process nor thread is in fact - * currently accessing this index. + * Caution: this should only be used by failure recovery code, when it is + * known that no other process nor thread is in fact currently accessing this + * index. */ public static void unlock(Directory directory) throws IOException { directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release(); } - - /** If {@link DirectoryReader#open(IndexWriter,boolean)} has - * been called (ie, this writer is in near real-time - * mode), then after a merge completes, this class can be - * invoked to warm the reader on the newly merged - * segment, before the merge commits. This is not - * required for near real-time search, but will reduce - * search latency on opening a new near real-time reader - * after a merge completes. - * + + /** + * If {@link DirectoryReader#open(IndexWriter,boolean)} has been called (ie, + * this writer is in near real-time mode), then after a merge completes, this + * class can be invoked to warm the reader on the newly merged segment, before + * the merge commits. This is not required for near real-time search, but will + * reduce search latency on opening a new near real-time reader after a merge + * completes. + * * @lucene.experimental - * - *

    NOTE: warm is called before any deletes have - * been carried over to the merged segment. */ + * + *

    + * NOTE: warm is called before any deletes have + * been carried over to the merged segment. + */ public static abstract class IndexReaderWarmer { - - /** Sole constructor. (For invocation by subclass - * constructors, typically implicit.) */ - protected IndexReaderWarmer() { - } - - /** Invoked on the {@link AtomicReader} for the newly - * merged segment, before that segment is made visible - * to near-real-time readers. */ + + /** + * Sole constructor. (For invocation by subclass constructors, typically + * implicit.) + */ + protected IndexReaderWarmer() {} + + /** + * Invoked on the {@link AtomicReader} for the newly merged segment, before + * that segment is made visible to near-real-time readers. + */ public abstract void warm(AtomicReader reader) throws IOException; } - + private void handleOOM(OutOfMemoryError oom, String location) { if (infoStream.isEnabled("IW")) { infoStream.message("IW", "hit OutOfMemoryError inside " + location); @@ -4062,67 +4460,73 @@ hitOOM = true; throw oom; } - - // Used only by assert for testing. Current points: - // startDoFlush - // startCommitMerge - // startStartCommit - // midStartCommit - // midStartCommit2 - // midStartCommitSuccess - // finishStartCommit - // startCommitMergeDeletes - // startMergeInit - // DocumentsWriter.ThreadState.init start + + // Used only by assert for testing. Current points: + // startDoFlush + // startCommitMerge + // startStartCommit + // midStartCommit + // midStartCommit2 + // midStartCommitSuccess + // finishStartCommit + // startCommitMergeDeletes + // startMergeInit + // DocumentsWriter.ThreadState.init start boolean testPoint(String name) { return true; } - + synchronized boolean nrtIsCurrent(SegmentInfos infos) { - //System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any())); + // System.out.println("IW.nrtIsCurrent " + (infos.version == + // segmentInfos.version && !docWriter.anyChanges() && + // !bufferedDeletesStream.any())); ensureOpen(); if (infoStream.isEnabled("IW")) { - infoStream.message("IW", "nrtIsCurrent: infoVersion matches: " + (infos.version == segmentInfos.version) + " DW changes: " + docWriter.anyChanges() + " BD changes: "+bufferedDeletesStream.any()); - + infoStream.message("IW", + "nrtIsCurrent: infoVersion matches: " + + (infos.version == segmentInfos.version) + " DW changes: " + + docWriter.anyChanges() + " BD changes: " + + bufferedDeletesStream.any()); + } - return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any(); + return infos.version == segmentInfos.version && !docWriter.anyChanges() + && !bufferedDeletesStream.any(); } - + synchronized boolean isClosed() { return closed; } - - /** Expert: remove any index files that are no longer - * used. - * - *

    IndexWriter normally deletes unused files itself, - * during indexing. However, on Windows, which disallows - * deletion of open files, if there is a reader open on - * the index then those files cannot be deleted. This is - * fine, because IndexWriter will periodically retry - * the deletion.

    - * - *

    However, IndexWriter doesn't try that often: only - * on open, close, flushing a new segment, and finishing - * a merge. If you don't do any of these actions with your - * IndexWriter, you'll see the unused files linger. If - * that's a problem, call this method to delete them - * (once you've closed the open readers that were - * preventing their deletion). - * - *

    In addition, you can call this method to delete - * unreferenced index commits. This might be useful if you - * are using an {@link IndexDeletionPolicy} which holds - * onto index commits until some criteria are met, but those - * commits are no longer needed. Otherwise, those commits will - * be deleted the next time commit() is called. + + /** + * Expert: remove any index files that are no longer used. + * + *

    + * IndexWriter normally deletes unused files itself, during indexing. However, + * on Windows, which disallows deletion of open files, if there is a reader + * open on the index then those files cannot be deleted. This is fine, because + * IndexWriter will periodically retry the deletion. + *

    + * + *

    + * However, IndexWriter doesn't try that often: only on open, close, flushing + * a new segment, and finishing a merge. If you don't do any of these actions + * with your IndexWriter, you'll see the unused files linger. If that's a + * problem, call this method to delete them (once you've closed the open + * readers that were preventing their deletion). + * + *

    + * In addition, you can call this method to delete unreferenced index commits. + * This might be useful if you are using an {@link IndexDeletionPolicy} which + * holds onto index commits until some criteria are met, but those commits are + * no longer needed. Otherwise, those commits will be deleted the next time + * commit() is called. */ public synchronized void deleteUnusedFiles() throws IOException { ensureOpen(false); deleter.deletePendingFiles(); deleter.revisitPolicy(); } - + // Called by DirectoryReader.doClose synchronized void deletePendingFiles() throws IOException { deleter.deletePendingFiles(); @@ -4132,25 +4536,50 @@ * NOTE: this method creates a compound file for all files returned by * info.files(). While, generally, this may include separate norms and * deletion files, this SegmentInfo must not reference such files when this - * method is called, because they are not allowed within a compound file. + * method is called, because they are not allowed within a compound file. The + * value of updateGen for a base segment must be negative. */ - static final Collection createCompoundFile(InfoStream infoStream, Directory directory, CheckAbort checkAbort, final SegmentInfo info, IOContext context) - throws IOException { - - final String fileName = IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION); + static final Collection createCompoundFile(InfoStream infoStream, + Directory directory, CheckAbort checkAbort, final SegmentInfo info, + IOContext context, long updateGen) throws IOException { + + String fileName = IndexFileNames.fileNameFromGeneration(info.name, + IndexFileNames.COMPOUND_FILE_EXTENSION, updateGen, true); + if (fileName == null) { + fileName = IndexFileNames.segmentFileName(info.name, "", + IndexFileNames.COMPOUND_FILE_EXTENSION); + } if (infoStream.isEnabled("IW")) { infoStream.message("IW", "create compound file " + fileName); } // Now merge all added files - Collection files = info.files(); - CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); + String prefix = info.name; + Collection files = null; + if (updateGen < 0) { + files = info.files(); + } else { + // TODO : quick and dirty, better solve by aggregating files in advance + files = new ArrayList(); + prefix = IndexFileNames.fileNameFromGeneration(info.name, "", updateGen, + true); + String[] allFiles = directory.listAll(); + for (int i = 0; i < allFiles.length; i++) { + if (allFiles[i].startsWith(prefix)) { + files.add(allFiles[i]); + } + } + } + final String cfeFileName = IndexFileNames.segmentFileName(prefix, "", + IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); + CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, + fileName, context, true); IOException prior = null; try { for (String file : files) { directory.copy(cfsDir, file, file, context); checkAbort.work(directory.fileLength(file)); } - } catch(IOException ex) { + } catch (IOException ex) { prior = ex; } finally { boolean success = false; @@ -4161,38 +4590,44 @@ if (!success) { try { directory.deleteFile(fileName); - } catch (Throwable t) { - } + } catch (Throwable t) {} try { - directory.deleteFile(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); - } catch (Throwable t) { - } + directory.deleteFile(cfeFileName); + } catch (Throwable t) {} } } } - + // Replace all previous files with the CFS/CFE files: Set siFiles = new HashSet(); + siFiles.addAll(info.files()); + siFiles.removeAll(files); siFiles.add(fileName); - siFiles.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + siFiles.add(cfeFileName); info.setFiles(siFiles); - + return files; } /** * Tries to delete the given files if unreferenced - * @param files the files to delete - * @throws IOException if an {@link IOException} occurs + * + * @param files + * the files to delete + * @throws IOException + * if an {@link IOException} occurs * @see IndexFileDeleter#deleteNewFiles(Collection) */ - synchronized final void deleteNewFiles(Collection files) throws IOException { + synchronized final void deleteNewFiles(Collection files) + throws IOException { deleter.deleteNewFiles(files); } /** - * Cleans up residuals from a segment that could not be entirely flushed due to an error - * @see IndexFileDeleter#refresh(String) + * Cleans up residuals from a segment that could not be entirely flushed due + * to an error + * + * @see IndexFileDeleter#refresh(String) */ synchronized final void flushFailed(SegmentInfo info) throws IOException { deleter.refresh(info.name); Index: lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java (working copy) @@ -71,7 +71,7 @@ if (consumer == null) { assert fieldInfo.getNormType() == null || fieldInfo.getNormType() == type; fieldInfo.setNormValueType(type); - consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(""), fieldInfo, type); + consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(), fieldInfo, type); this.initType = type; } if (initType != type) { Index: lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (working copy) @@ -20,8 +20,10 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; import org.apache.lucene.util.MutableBits; @@ -63,6 +65,8 @@ // liveDocs vs when we loaded it or last wrote it: private int pendingDeleteCount; + private UpdatedSegmentData liveUpdates; + // True if the current liveDocs is referenced by an // external NRT reader: private boolean shared; @@ -207,7 +211,7 @@ } shared = true; if (liveDocs != null) { - return new SegmentReader(reader.getSegmentInfo(), reader.core, liveDocs, info.info.getDocCount() - info.getDelCount() - pendingDeleteCount); + return new SegmentReader(reader.getSegmentInfo(), context, reader.core, liveDocs, info.info.getDocCount() - info.getDelCount() - pendingDeleteCount); } else { assert reader.getLiveDocs() == liveDocs; reader.incRef(); @@ -290,6 +294,25 @@ } } + public synchronized void setLiveUpdates(UpdatedSegmentData updatedSegmentData) { + assert liveUpdates == null; + liveUpdates = updatedSegmentData; + } + + public synchronized boolean writeLiveUpdates(Directory directory, Codec codec) + throws IOException { + if (liveUpdates == null || !liveUpdates.hasUpdates()) { + return false; + } + IOContext context = new IOContext(new FlushInfo(info.info.getDocCount(), + info.info.sizeInBytes())); + writer.writeSegmentUpdates(info, liveUpdates, context); + + liveUpdates = null; + + return true; + } + @Override public String toString() { return "ReadersAndLiveDocs(seg=" + info + " pendingDeleteCount=" + pendingDeleteCount + " shared=" + shared + ")"; Index: lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java (working copy) @@ -80,29 +80,41 @@ private final Set coreClosedListeners = Collections.synchronizedSet(new LinkedHashSet()); - SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentInfoPerCommit si, IOContext context, int termsIndexDivisor) throws IOException { + SegmentCoreReaders(SegmentReader owner, SegmentInfoPerCommit si, long updageGen, IOContext context, int termsIndexDivisor) throws IOException { if (termsIndexDivisor == 0) { throw new IllegalArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } + + final SegmentInfo info; + final String infoName; + if (updageGen == -1) { + info = si.info; + infoName = info.name; + } else { + info = new SegmentInfo(si.info, updageGen); + infoName = IndexFileNames.fileNameFromGeneration(si.info.name, "", updageGen, true); + } - final Codec codec = si.info.getCodec(); + Directory dir = info.dir; + + final Codec codec = info.getCodec(); final Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. boolean success = false; try { - if (si.info.getUseCompoundFile()) { - cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(si.info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); + if (info.getUseCompoundFile()) { + cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(infoName, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } - fieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, si.info.name, IOContext.READONCE); + fieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, infoName, IOContext.READONCE); this.termsIndexDivisor = termsIndexDivisor; final PostingsFormat format = codec.postingsFormat(); - final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, fieldInfos, context, termsIndexDivisor); + final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.fieldsProducer(segmentReadState); assert fields != null; @@ -112,10 +124,10 @@ norms = codec.normsFormat().docsProducer(segmentReadState); perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState); - fieldsReaderOrig = si.info.getCodec().storedFieldsFormat().fieldsReader(cfsDir, si.info, fieldInfos, context); + fieldsReaderOrig = info.getCodec().storedFieldsFormat().fieldsReader(cfsDir, info, fieldInfos, context); if (fieldInfos.hasVectors()) { // open term vector files only as needed - termVectorsReaderOrig = si.info.getCodec().termVectorsFormat().vectorsReader(cfsDir, si.info, fieldInfos, context); + termVectorsReaderOrig = info.getCodec().termVectorsFormat().vectorsReader(cfsDir, info, fieldInfos, context); } else { termVectorsReaderOrig = null; } Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -100,6 +100,17 @@ this.attributes = attributes; } + SegmentInfo(SegmentInfo info, long gen) { + this.dir = info.dir; + this.version = info.version; + this.name = IndexFileNames.updatedSegmentFileNameFromGeneration(info.name, gen); + this.docCount = info.docCount; + this.isCompoundFile = info.isCompoundFile; + this.codec = info.codec; + this.diagnostics = info.diagnostics; + this.attributes = info.attributes; + } + /** * Returns total size in bytes of all of files used by * this segment. Note that this will not include any live @@ -273,6 +284,14 @@ sizeInBytes = -1; } + /** Remove this file from the set of files written for this + * segment. */ + public void removeFile(String file) { + checkFileNames(Collections.singleton(file)); + setFiles.remove(file); + sizeInBytes = -1; + } + private void checkFileNames(Collection files) { Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher(""); for (String file : files) { Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (working copy) @@ -40,6 +40,9 @@ // are no deletes yet): private long delGen; + // Generation number of updates (-1 if there are no updates yet): + private long updateGen; + private volatile long sizeInBytes = -1; /** Sole constructor. @@ -48,10 +51,11 @@ * @param delGen deletion generation number (used to name deletion files) **/ - public SegmentInfoPerCommit(SegmentInfo info, int delCount, long delGen) { + public SegmentInfoPerCommit(SegmentInfo info, int delCount, long delGen, long updateGen) { this.info = info; this.delCount = delCount; this.delGen = delGen; + this.updateGen = updateGen; } void advanceDelGen() { @@ -63,6 +67,15 @@ sizeInBytes = -1; } + void advanceUpdateGen() { + if (updateGen == -1) { + updateGen = 1; + } else { + updateGen++; + } + sizeInBytes = -1; + } + /** Returns total size in bytes of all files for this * segment. */ public long sizeInBytes() throws IOException { @@ -85,6 +98,9 @@ // Must separately add any live docs files: info.getCodec().liveDocsFormat().files(this, files); + // Must separately add any generation replacement files: + info.getCodec().generationReplacementsFormat().files(this, info.dir, files); + return files; } @@ -106,6 +122,7 @@ sizeInBytes = -1; } + /** * Sets the generation number of the live docs file. * @see #getDelGen() @@ -117,14 +134,14 @@ /** Returns true if there are any deletions for the * segment at this commit. */ - public boolean hasDeletions() { + public boolean hasDeletions() { return delGen != -1; } - /** - * Returns the next available generation number - * of the live docs file. - */ + /** + * Returns the next available generation number + * of the live docs file. + */ public long getNextDelGen() { if (delGen == -1) { return 1; @@ -164,11 +181,39 @@ if (delGen != -1) { s += ":delGen=" + delGen; } + if (updateGen != -1) { + s += ":updateGen=" + updateGen; + } return s; } @Override public SegmentInfoPerCommit clone() { - return new SegmentInfoPerCommit(info, delCount, delGen); + return new SegmentInfoPerCommit(info, delCount, delGen, updateGen); } + + public void setUpdateGen(long updateGen) { + this.updateGen = updateGen; + sizeInBytes = -1; + } + + public boolean hasUpdates() { + return updateGen != -1; + } + + public long getNextUpdateGen() { + if (updateGen == -1) { + return 1; + } + return updateGen + 1; + } + + public long getUpdateGen() { + return updateGen; + } + + void clearUpdateGen() { + updateGen = -1; + sizeInBytes = -1; + } } Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (revision 1420477) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -203,7 +203,7 @@ public static String getLastCommitSegmentsFileName(String[] files) { return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - getLastCommitGeneration(files)); + getLastCommitGeneration(files), false); } /** @@ -215,7 +215,7 @@ public static String getLastCommitSegmentsFileName(Directory directory) throws IOException { return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - getLastCommitGeneration(directory)); + getLastCommitGeneration(directory), false); } /** @@ -224,7 +224,7 @@ public String getSegmentsFileName() { return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - lastGeneration); + lastGeneration, false); } /** @@ -256,7 +256,7 @@ } return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - nextGeneration); + nextGeneration, false); } /** @@ -305,7 +305,8 @@ if (delCount < 0 || delCount > info.getDocCount()) { throw new CorruptIndexException("invalid deletion count: " + delCount + " (resource: " + input + ")"); } - add(new SegmentInfoPerCommit(info, delCount, delGen)); + long updateGen = input.readLong(); + add(new SegmentInfoPerCommit(info, delCount, delGen, updateGen)); } userData = input.readStringStringMap(); @@ -373,6 +374,7 @@ segnOutput.writeString(si.getCodec().getName()); segnOutput.writeLong(siPerCommit.getDelGen()); segnOutput.writeInt(siPerCommit.getDelCount()); + segnOutput.writeLong(siPerCommit.getUpdateGen()); assert si.dir == directory; assert siPerCommit.getDelCount() <= si.getDocCount(); @@ -662,7 +664,7 @@ segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - gen); + gen, false); try { Object v = doBody(segmentFileName); @@ -690,7 +692,7 @@ // try it if so: String prevSegmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - gen-1); + gen-1, false); final boolean prevExists; prevExists = directory.fileExists(prevSegmentFileName); @@ -742,7 +744,7 @@ // since lastGeneration isn't incremented: final String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - generation); + generation, false); // Suppress so we keep throwing the original exception // in our caller IOUtils.deleteFilesIgnoringExceptions(dir, segmentFileName); @@ -832,7 +834,7 @@ // logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. - final String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); + final String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation, false); success = false; try { dir.sync(Collections.singleton(fileName)); Index: lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -31,7 +31,6 @@ import org.apache.lucene.codecs.TermVectorsWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; @@ -103,7 +102,7 @@ assert numMerged == mergeState.segmentInfo.getDocCount(); final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo, - mergeState.fieldInfos, termIndexInterval, null, context); + 0, mergeState.fieldInfos, termIndexInterval, null, null, context); mergeTerms(segmentWriteState); mergePerDoc(segmentWriteState); Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -18,48 +18,70 @@ */ import java.io.IOException; +import java.util.HashMap; +import java.util.Map; -import org.apache.lucene.store.Directory; +import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; -import org.apache.lucene.search.FieldCache; // javadocs +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.store.CompoundFileDirectory; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; +// javadocs + /** - * IndexReader implementation over a single segment. + * IndexReader implementation over a single segment. *

    - * Instances pointing to the same segment (but with different deletes, etc) - * may share the same core data. + * Instances pointing to the same segment (but with different deletes, etc) may + * share the same core data. + * * @lucene.experimental */ public final class SegmentReader extends AtomicReader { - + private final SegmentInfoPerCommit si; private final Bits liveDocs; - + // Normally set to si.docCount - si.delDocCount, unless we // were created as an NRT reader from IW, in which case IW // tells us the docCount: private final int numDocs; - + final SegmentCoreReaders core; - + + private SegmentCoreReaders[] updates; + private final IOContext context; + private Fields fields; + private FieldInfos fieldInfos; + private StoredFieldsReader fieldsReader; + private TermVectorsReader termVectorsReader; + private Map replacementsMap; + /** * Constructs a new SegmentReader with a new core. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error */ // TODO: why is this public? - public SegmentReader(SegmentInfoPerCommit si, int termInfosIndexDivisor, IOContext context) throws IOException { + public SegmentReader(SegmentInfoPerCommit si, int termInfosIndexDivisor, + IOContext context) throws IOException { this.si = si; - core = new SegmentCoreReaders(this, si.info.dir, si, context, termInfosIndexDivisor); + this.context = context; + core = new SegmentCoreReaders(this, si, -1, context, termInfosIndexDivisor); + initUpdates(si, termInfosIndexDivisor, context); boolean success = false; try { if (si.hasDeletions()) { // NOTE: the bitvector is stored using the regular directory, not cfs - liveDocs = si.info.getCodec().liveDocsFormat().readLiveDocs(directory(), si, new IOContext(IOContext.READ, true)); + liveDocs = si.info.getCodec().liveDocsFormat() + .readLiveDocs(directory(), si, new IOContext(IOContext.READ, true)); } else { assert si.getDelCount() == 0; liveDocs = null; @@ -68,125 +90,299 @@ success = true; } finally { // With lock-less commits, it's entirely possible (and - // fine) to hit a FileNotFound exception above. In + // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { core.decRef(); + if (updates != null) { + for (int i = 0; i < updates.length; i++) { + updates[i].decRef(); + } + } } } } - - /** Create new SegmentReader sharing core from a previous - * SegmentReader and loading new live docs from a new - * deletes file. Used by openIfChanged. */ - SegmentReader(SegmentInfoPerCommit si, SegmentCoreReaders core, IOContext context) throws IOException { - this(si, core, - si.info.getCodec().liveDocsFormat().readLiveDocs(si.info.dir, si, context), - si.info.getDocCount() - si.getDelCount()); + + /** + * Create new SegmentReader sharing core from a previous SegmentReader and + * loading new live docs from a new deletes file. Used by openIfChanged. + */ + SegmentReader(SegmentInfoPerCommit si, SegmentCoreReaders core, + IOContext context) throws IOException { + this(si, context, core, si.info.getCodec().liveDocsFormat() + .readLiveDocs(si.info.dir, si, context), si.info.getDocCount() + - si.getDelCount()); } - - /** Create new SegmentReader sharing core from a previous - * SegmentReader and using the provided in-memory - * liveDocs. Used by IndexWriter to provide a new NRT - * reader */ - SegmentReader(SegmentInfoPerCommit si, SegmentCoreReaders core, Bits liveDocs, int numDocs) { + + /** + * Create new SegmentReader sharing core from a previous SegmentReader and + * using the provided in-memory liveDocs. Used by IndexWriter to provide a new + * NRT reader + */ + SegmentReader(SegmentInfoPerCommit si, IOContext context, + SegmentCoreReaders core, Bits liveDocs, int numDocs) { this.si = si; + this.context = context; this.core = core; core.incRef(); - + this.updates = null; + // TODO : handle NRT updates, add field liveUpdates + assert liveDocs != null; this.liveDocs = liveDocs; - + this.numDocs = numDocs; } - + + private void initUpdates(SegmentInfoPerCommit si, int termInfosIndexDivisor, + IOContext context) throws IOException { + if (si.hasUpdates()) { + updates = new SegmentCoreReaders[(int) si.getUpdateGen()]; + for (int i = 0; i < updates.length; i++) { + updates[i] = new SegmentCoreReaders(this, si, i + 1, context, + termInfosIndexDivisor); + } + return; + } + updates = null; + } + @Override public Bits getLiveDocs() { ensureOpen(); return liveDocs; } - + @Override protected void doClose() throws IOException { - //System.out.println("SR.close seg=" + si); + // System.out.println("SR.close seg=" + si); core.decRef(); + if (updates != null) { + for (int i = 0; i < updates.length; i++) { + updates[i].decRef(); + } + } } - + @Override public boolean hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return liveDocs != null; } - + @Override public FieldInfos getFieldInfos() { ensureOpen(); - return core.fieldInfos; + if (updates == null) { + return core.fieldInfos; + } + + // need to create FieldInfos combining core and updates infos + final FieldInfos.Builder builder = new FieldInfos.Builder(); + builder.add(core.fieldInfos); + for (final SegmentCoreReaders update : updates) { + builder.add(update.fieldInfos); + } + fieldInfos = builder.finish(); + return fieldInfos; } - - /** Expert: retrieve thread-private {@link - * StoredFieldsReader} - * @lucene.internal */ - public StoredFieldsReader getFieldsReader() { + + /** + * Expert: retrieve thread-private {@link StoredFieldsReader} + * + * @lucene.internal + */ + public StoredFieldsReader getFieldsReader() throws IOException { ensureOpen(); - return core.fieldsReaderLocal.get(); + if (updates == null) { + return core.fieldsReaderLocal.get(); + } + + synchronized (updates) { + if (fieldsReader == null) { + // generate readers array + StoredFieldsReader[] allReaders = new StoredFieldsReader[updates.length + 1]; + allReaders[0] = core.fieldsReaderLocal.get(); + for (int i = 0; i < updates.length; i++) { + allReaders[i + 1] = updates[i].fieldsReaderLocal.get(); + } + + // generate replacements map + if (replacementsMap == null) { + generateReplacementsMap(); + } + + fieldsReader = new StackedStoredFieldsReader(allReaders, + replacementsMap); + } + } + + return fieldsReader; } + private synchronized void generateReplacementsMap() throws IOException { + if (replacementsMap == null) { + replacementsMap = new HashMap(); + boolean found = addReplacements(core.fields, core.cfsReader); + for (int i = 0; i < updates.length; i++) { + if (addReplacements(updates[i].fields, updates[i].cfsReader)) { + found = true; + } + } + if (!found) { + // no replacements + replacementsMap.clear(); + } + } + } + + private boolean addReplacements(FieldsProducer fields, + CompoundFileDirectory cfsReader) throws IOException { + boolean found = false; + for (String field : fields) { + if (!replacementsMap.containsKey(field)) { + final FieldGenerationReplacements replacements = si.info.getCodec() + .generationReplacementsFormat() + .readGenerationReplacements(field, si, context); + replacementsMap.put(field, replacements); + if (replacements != null) { + found = true; + } + } + } + return found; + } + @Override - public void document(int docID, StoredFieldVisitor visitor) throws IOException { + public void document(int docID, StoredFieldVisitor visitor) + throws IOException { checkBounds(docID); - getFieldsReader().visitDocument(docID, visitor); + getFieldsReader().visitDocument(docID, visitor, null); } - + @Override - public Fields fields() { + public Fields fields() throws IOException { ensureOpen(); - return core.fields; + if (fields == null) { + if (updates == null || updates.length == 0) { + return core.fields; + } + + // generate fields array + Fields[] fieldsArray = new Fields[updates.length + 1]; + fieldsArray[0] = core.fields; + for (int i = 0; i < updates.length; i++) { + fieldsArray[i + 1] = updates[i].fields; + } + + // generate replacements map + if (replacementsMap == null) { + generateReplacementsMap(); + } + + fields = new StackedFields(fieldsArray, replacementsMap, -1); + } + return fields; } - + @Override public int numDocs() { // Don't call ensureOpen() here (it could affect performance) return numDocs; } - + @Override public int maxDoc() { // Don't call ensureOpen() here (it could affect performance) return si.info.getDocCount(); } - - /** Expert: retrieve thread-private {@link - * TermVectorsReader} - * @lucene.internal */ - public TermVectorsReader getTermVectorsReader() { + + /** + * Expert: retrieve thread-private {@link TermVectorsReader} + * + * @lucene.internal + */ + public TermVectorsReader getTermVectorsReader() throws IOException { ensureOpen(); - return core.termVectorsLocal.get(); + if (updates == null) { + return core.termVectorsLocal.get(); + } + if (termVectorsReader == null) { + setStackedTermVectorsReader(); + } + + return termVectorsReader; } - - @Override - public Fields getTermVectors(int docID) throws IOException { - TermVectorsReader termVectorsReader = getTermVectorsReader(); + + private synchronized void setStackedTermVectorsReader() throws IOException { if (termVectorsReader == null) { - return null; + // generate readers array + TermVectorsReader[] tvReaders = new TermVectorsReader[updates.length + 1]; + tvReaders[0] = core.termVectorsLocal.get(); + for (int i = 0; i < updates.length; i++) { + tvReaders[i + 1] = updates[i].termVectorsLocal.get(); + } + + // generate replacements map + if (replacementsMap == null) { + generateReplacementsMap(); + } + + termVectorsReader = new StackedTermVectorsReader(tvReaders, + replacementsMap, -1); } - checkBounds(docID); - return termVectorsReader.get(docID); } private void checkBounds(int docID) { - if (docID < 0 || docID >= maxDoc()) { - throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")"); + if (docID < 0 || docID >= maxDoc()) { + throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + + maxDoc() + " (got docID=" + docID + ")"); } } - + @Override + public Fields getTermVectors(int docID) throws IOException { + ensureOpen(); + if (updates == null) { + // no updates, delegate to core + checkBounds(docID); + final TermVectorsReader coreReader = core.termVectorsLocal.get(); + if (coreReader == null) { + return null; + } + return coreReader.get(docID); + } + // generate fields array, only fields of the given docID + Fields[] fields = new Fields[updates.length + 1]; + final TermVectorsReader coreReader = core.termVectorsLocal.get(); + if (coreReader != null) { + checkBounds(docID); + fields[0] = coreReader.get(docID); + } + for (int i = 0; i < updates.length; i++) { + final TermVectorsReader updateReader = updates[i].termVectorsLocal.get(); + if (updateReader != null) { + checkBounds(docID); + fields[i + 1] = updateReader.get(docID); + } + } + + // generate replacements map + if (replacementsMap == null) { + generateReplacementsMap(); + } + + return new StackedFields(fields, replacementsMap, docID); + } + + @Override public String toString() { // SegmentInfo.toString takes dir and number of // *pending* deletions; so we reverse compute that here: - return si.toString(si.info.dir, si.info.getDocCount() - numDocs - si.getDelCount()); + return si.toString(si.info.dir, + si.info.getDocCount() - numDocs - si.getDelCount()); } /** @@ -202,7 +398,7 @@ SegmentInfoPerCommit getSegmentInfo() { return si; } - + /** Returns the directory this index resides in. */ public Directory directory() { // Don't ensureOpen here -- in certain cases, when a @@ -210,22 +406,24 @@ // this method on the closed original reader return si.info.dir; } - + // This is necessary so that cloned SegmentReaders (which // share the underlying postings data) will map to the - // same entry in the FieldCache. See LUCENE-1579. + // same entry in the FieldCache. See LUCENE-1579. @Override public Object getCoreCacheKey() { return core; } - + @Override public Object getCombinedCoreAndDeletesKey() { return this; } - - /** Returns term infos index divisor originally passed to - * {@link #SegmentReader(SegmentInfoPerCommit, int, IOContext)}. */ + + /** + * Returns term infos index divisor originally passed to + * {@link #SegmentReader(SegmentInfoPerCommit, int, IOContext)}. + */ public int getTermInfosIndexDivisor() { return core.termsIndexDivisor; } @@ -233,40 +431,58 @@ @Override public DocValues docValues(String field) throws IOException { ensureOpen(); - final PerDocProducer perDoc = core.perDocProducer; - if (perDoc == null) { - return null; + DocValues docValues = internalGetDocValues(core.perDocProducer, field); + if (updates != null) { + // if no norms for core, try using the first producer available in updates + for (int i = 0; docValues == null && i < updates.length; i++) { + if (updates[i] != null) { + docValues = internalGetDocValues(updates[i].perDocProducer, field); + } + } } - return perDoc.docValues(field); + return docValues; } @Override public DocValues normValues(String field) throws IOException { ensureOpen(); - final PerDocProducer perDoc = core.norms; + DocValues normValues = internalGetDocValues(core.norms, field); + if (updates != null) { + // if no norms for core, try using the first norms available in updates + for (int i = 0; normValues == null && i < updates.length; i++) { + if (updates[i] != null) { + normValues = internalGetDocValues(updates[i].norms, field); + } + } + } + return normValues; + } + + private DocValues internalGetDocValues(PerDocProducer perDoc, String field) + throws IOException { if (perDoc == null) { return null; } - return perDoc.docValues(field); + final DocValues docValues = perDoc.docValues(field); + return docValues; } - /** - * Called when the shared core for this SegmentReader - * is closed. + * Called when the shared core for this SegmentReader is closed. *

    - * This listener is called only once all SegmentReaders - * sharing the same core are closed. At this point it - * is safe for apps to evict this reader from any caches - * keyed on {@link #getCoreCacheKey}. This is the same - * interface that {@link FieldCache} uses, internally, - * to evict entries.

    + * This listener is called only once all SegmentReaders sharing the same core + * are closed. At this point it is safe for apps to evict this reader from any + * caches keyed on {@link #getCoreCacheKey}. This is the same interface that + * {@link FieldCache} uses, internally, to evict entries. + *

    * * @lucene.experimental */ public static interface CoreClosedListener { - /** Invoked when the shared core of the provided {@link - * SegmentReader} has closed. */ + /** + * Invoked when the shared core of the provided {@link SegmentReader} has + * closed. + */ public void onClose(SegmentReader owner); } Index: lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java (working copy) @@ -39,6 +39,7 @@ /** {@link SegmentInfo} describing this segment. */ public final SegmentInfo segmentInfo; + public final int updateGen; /** {@link FieldInfos} describing all fields in this * segment. */ @@ -55,10 +56,20 @@ * deleted. */ public final BufferedDeletes segDeletes; + /** Updates to apply while we are flushing the segment. A + * Term is enrolled in here if it was used in update at one + * point, and it's mapped to the docIDUpto, meaning any + * docID < docIDUpto containing this term should be + * deleted. */ + public final BufferedUpdates segUpdates; + /** {@link MutableBits} recording live documents; this is * only set if there is one or more deleted documents. */ public MutableBits liveDocs; + // Lazily created: + public UpdatedSegmentData liveUpdates; + /** Unique suffix for any postings files written for this * segment. {@link PerFieldPostingsFormat} sets this for * each of the postings formats it wraps. If you create @@ -79,12 +90,16 @@ public final IOContext context; /** Sole constructor. */ - public SegmentWriteState(InfoStream infoStream, Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, - int termIndexInterval, BufferedDeletes segDeletes, IOContext context) { + public SegmentWriteState(InfoStream infoStream, Directory directory, + SegmentInfo segmentInfo, int updateGen, FieldInfos fieldInfos, + int termIndexInterval, BufferedDeletes segDeletes, + BufferedUpdates segUpdates, IOContext context) { this.infoStream = infoStream; this.segDeletes = segDeletes; + this.segUpdates = segUpdates; this.directory = directory; this.segmentInfo = segmentInfo; + this.updateGen = updateGen; this.fieldInfos = fieldInfos; this.termIndexInterval = termIndexInterval; segmentSuffix = ""; @@ -98,11 +113,13 @@ infoStream = state.infoStream; directory = state.directory; segmentInfo = state.segmentInfo; + updateGen = state.updateGen; fieldInfos = state.fieldInfos; termIndexInterval = state.termIndexInterval; context = state.context; this.segmentSuffix = segmentSuffix; segDeletes = state.segDeletes; + segUpdates = state.segUpdates; delCountOnFlush = state.delCountOnFlush; } } Index: lucene/core/src/java/org/apache/lucene/index/SortedFieldsUpdates.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SortedFieldsUpdates.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/SortedFieldsUpdates.java (working copy) @@ -0,0 +1,25 @@ +package org.apache.lucene.index; + +import java.util.SortedSet; +import java.util.TreeMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class SortedFieldsUpdates extends TreeMap> { + +} Index: lucene/core/src/java/org/apache/lucene/index/StackedDocsEnum.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StackedDocsEnum.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/StackedDocsEnum.java (working copy) @@ -0,0 +1,232 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Map; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PriorityQueue; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class StackedDocsEnum extends DocsAndPositionsEnum { + + /** + * A list containing the active enumerations, ordered from. + */ + final private LinkedList active; + + /** + * A queue containing non-active enums, ordered by doc ID. + */ + final private PriorityQueue queueByDocId; + + /** + * A queue for ordering active enums by decreasing enum index. + */ + final private PriorityQueue queueByIndex; + + /** + * Field generation replacements for the enclosing field. + */ + final private FieldGenerationReplacements replacements; + + /** + * Current doc ID. + */ + private int docId; + + /** + * An iterator over active enums (for positions). + */ + private Iterator activeIterator; + + /** + * Current positions enum. + */ + private DocsEnumWithIndex positionsEnum; + + /** + * Number of positions left in positionsEnum. + */ + private int positionsLeft; + + private static final FieldGenerationReplacements NO_REPLACEMENTS = new FieldGenerationReplacements(); + + public StackedDocsEnum(Map activeMap, + FieldGenerationReplacements replacements) { + active = new LinkedList(); + for (DocsEnum docsEnum : activeMap.keySet()) { + active.add(new DocsEnumWithIndex(docsEnum, activeMap.get(docsEnum))); + } + + queueByDocId = new DocsEnumDocIdPriorityQueue(activeMap.size()); + queueByIndex = new DocsEnumIndexPriorityQueue(activeMap.size()); + if (replacements == null) { + this.replacements = NO_REPLACEMENTS; + } else { + this.replacements = replacements; + } + } + + @Override + public int nextDoc() throws IOException { + // advance all enums that were active in last docId, and put in queue + for (DocsEnumWithIndex docsEnum : active) { + if (docsEnum.docsEnum.nextDoc() != NO_MORE_DOCS) { + queueByDocId.add(docsEnum); + } + } + active.clear(); + + actualNextDoc(); + return docId; + } + + @Override + public int advance(int target) throws IOException { + // advance all enums, and put in queue + for (DocsEnumWithIndex docsEnum : active) { + if (docsEnum.docsEnum.advance(target) != NO_MORE_DOCS) { + queueByDocId.add(docsEnum); + } + } + active.clear(); + + actualNextDoc(); + return docId; + } + + private void actualNextDoc() throws IOException { + positionsEnum = null; + while (queueByDocId.size() > 0) { + // put all enums with minimal docId in active list + docId = queueByDocId.top().docsEnum.docID(); + while (queueByDocId.size() > 0 + && docId == queueByDocId.top().docsEnum.docID()) { + queueByIndex.add(queueByDocId.pop()); + } + + // make sure non-replaced fields exist + while (queueByIndex.size() > 0 + && queueByIndex.top().index >= replacements.get(docId)) { + active.addFirst(queueByIndex.pop()); + } + // put replaced fields back in the queue + while (queueByIndex.size() > 0) { + DocsEnumWithIndex docsEnum = queueByIndex.pop(); + if (docsEnum.docsEnum.nextDoc() != NO_MORE_DOCS) { + queueByDocId.add(docsEnum); + } + } + if (!active.isEmpty()) { + return; + } + } + + docId = NO_MORE_DOCS; + } + + @Override + public int docID() { + return docId; + } + + @Override + public int freq() throws IOException { + int freq = 0; + for (DocsEnumWithIndex docsEnum : active) { + freq += docsEnum.docsEnum.freq(); + } + return freq; + } + + @Override + public int nextPosition() throws IOException { + if (positionsEnum == null) { + activeIterator = active.iterator(); + positionsLeft = 0; + } + + if (positionsLeft == 0) { + positionsEnum = activeIterator.next(); + positionsLeft = positionsEnum.docsEnum.freq(); + } + + positionsLeft--; + return ((DocsAndPositionsEnum) positionsEnum.docsEnum).nextPosition(); + } + + @Override + public int startOffset() throws IOException { + return ((DocsAndPositionsEnum) positionsEnum.docsEnum).startOffset(); + } + + @Override + public int endOffset() throws IOException { + return ((DocsAndPositionsEnum) positionsEnum.docsEnum).endOffset(); + } + + @Override + public BytesRef getPayload() throws IOException { + return ((DocsAndPositionsEnum) positionsEnum.docsEnum).getPayload(); + } + + protected class DocsEnumWithIndex { + + DocsEnum docsEnum; + int index; + + public DocsEnumWithIndex(DocsEnum docsEnum, int index) { + this.docsEnum = docsEnum; + this.index = index; + } + + } + + private class DocsEnumDocIdPriorityQueue extends + PriorityQueue { + + public DocsEnumDocIdPriorityQueue(int maxSize) { + super(maxSize); + } + + @Override + protected boolean lessThan(DocsEnumWithIndex a, DocsEnumWithIndex b) { + return a.docsEnum.docID() < b.docsEnum.docID(); + } + + } + + private class DocsEnumIndexPriorityQueue extends + PriorityQueue { + + public DocsEnumIndexPriorityQueue(int maxSize) { + super(maxSize); + } + + @Override + protected boolean lessThan(DocsEnumWithIndex a, DocsEnumWithIndex b) { + // bigger index should be first + return a.index < b.index; + } + + } + +} Index: lucene/core/src/java/org/apache/lucene/index/StackedFields.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StackedFields.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/StackedFields.java (working copy) @@ -0,0 +1,91 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * {@link Fields} of a segment with updates. + */ +public class StackedFields extends Fields { + + final Map fields; + + public StackedFields(Fields[] fieldsArray, + Map replacementsMap, int doc) + throws IOException { + fields = new TreeMap(); + final Set ignoreFields = new HashSet(); + + for (int i = fieldsArray.length - 1; i >= 0; i--) { + if (fieldsArray[i] != null) { + final Iterator iterator = fieldsArray[i].iterator(); + while (iterator.hasNext()) { + // handle single field + String field = iterator.next(); + if (!ignoreFields.contains(field)) { + Terms terms = fieldsArray[i].terms(field); + if (terms != null) { + StackedTerms stackedTerms = (StackedTerms) fields.get(field); + if (stackedTerms == null) { + stackedTerms = new StackedTerms(fieldsArray.length, + replacementsMap.get(field)); + fields.put(field, stackedTerms); + } + stackedTerms.addTerms(terms, i); + } + } + } + } + + if (doc >= 0) { + // ignore fields according to replacements for this document + for (Entry entry : replacementsMap + .entrySet()) { + if (!ignoreFields.contains(entry.getKey()) + && entry.getValue() != null && entry.getValue().get(doc) == i) { + ignoreFields.add(entry.getKey()); + } + } + } + } + } + + @Override + public Iterator iterator() { + return Collections.unmodifiableSet(fields.keySet()).iterator(); + } + + @Override + public Terms terms(String field) throws IOException { + return fields.get(field); + } + + @Override + public int size() { + return fields.size(); + } + +} Index: lucene/core/src/java/org/apache/lucene/index/StackedStoredFieldsReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StackedStoredFieldsReader.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/StackedStoredFieldsReader.java (working copy) @@ -0,0 +1,76 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.lucene.codecs.StoredFieldsReader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class StackedStoredFieldsReader extends StoredFieldsReader { + + private StoredFieldsReader[] allReaders; + private Map replacementsMap; + + public StackedStoredFieldsReader(StoredFieldsReader[] allReaders, + Map replacements) { + this.allReaders = allReaders; + this.replacementsMap = replacements; + } + + @Override + public void close() throws IOException { + for (StoredFieldsReader reader : allReaders) { + reader.close(); + } + } + + @Override + public void visitDocument(int n, StoredFieldVisitor visitor, + Set ignoreFields) throws IOException { + ignoreFields = new HashSet(); + // go over stacked segments from top to bottom + for (int i = allReaders.length - 1; i > 0; i--) { + // visit current stacked segment + allReaders[i].visitDocument(n, visitor, ignoreFields); + // accumulate fields to ignore in lower stacked segments + for (Entry entry : replacementsMap + .entrySet()) { + if (!ignoreFields.contains(entry.getKey()) && entry.getValue() != null + && entry.getValue().get(n) == i) { + ignoreFields.add(entry.getKey()); + } + } + } + // now visit core + allReaders[0].visitDocument(n, visitor, ignoreFields); + } + + @Override + public StoredFieldsReader clone() { + StoredFieldsReader[] newReaders = new StoredFieldsReader[allReaders.length]; + for (int i = 0; i < newReaders.length; i++) { + newReaders[i] = allReaders[i].clone(); + } + return new StackedStoredFieldsReader(newReaders, replacementsMap); + } + +} Index: lucene/core/src/java/org/apache/lucene/index/StackedTermVectorsReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StackedTermVectorsReader.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/StackedTermVectorsReader.java (working copy) @@ -0,0 +1,64 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.codecs.TermVectorsReader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class StackedTermVectorsReader extends TermVectorsReader { + + private final TermVectorsReader[] allReaders; + private final Map replacementsMap; + private final int docId; + + public StackedTermVectorsReader(TermVectorsReader[] allReaders, + Map replacementsMap, int docId) { + this.allReaders = allReaders; + this.replacementsMap = replacementsMap; + this.docId = docId; + } + + @Override + public void close() throws IOException {} + + @Override + public Fields get(int doc) throws IOException { + // in case docId != -1, we need to create special fields, where document + // docId is returned as doc 0, and all others ignored + if (docId != -1) { + if (doc != 0) { + return null; + } + } + // generate fields array + Fields[] fieldsArray = new Fields[allReaders.length]; + for (int i = 0; i < fieldsArray.length; i++) { + if (allReaders[i] != null) { + fieldsArray[i] = allReaders[i].get(doc); + } + } + return new StackedFields(fieldsArray, replacementsMap, doc); + } + + @Override + public TermVectorsReader clone() { + return new StackedTermVectorsReader(allReaders, replacementsMap, docId); + } +} Index: lucene/core/src/java/org/apache/lucene/index/StackedTerms.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StackedTerms.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/StackedTerms.java (working copy) @@ -0,0 +1,136 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.util.BytesRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class StackedTerms extends Terms { + + private final FieldGenerationReplacements replacements; + private final Terms[] subTerms; + private Comparator comparator; + private boolean hasOffsets; + private boolean hasPositions; + private boolean hasPayloads; + + public StackedTerms(int maxTerms, + FieldGenerationReplacements fieldGenerationReplacements) { + this.replacements = fieldGenerationReplacements; + subTerms = new Terms[maxTerms]; + + hasOffsets = false; + hasPositions = false; + hasPayloads = false; + } + + public void addTerms(Terms terms, int generation) throws IOException { + if (terms != null) { + subTerms[generation] = terms; + hasOffsets |= terms.hasOffsets(); + hasPositions |= terms.hasPositions(); + hasPayloads |= terms.hasPayloads(); + if (comparator == null) { + comparator = terms.getComparator(); + } else if (!comparator.equals(terms.getComparator())) { + throw new IllegalStateException( + "sub-readers have different BytesRef.Comparators; cannot merge"); + } + } + } + + @Override + public TermsEnum iterator(TermsEnum reuse) throws IOException { + return new StackedTermsEnum(subTerms, replacements, comparator); + } + + @Override + public Comparator getComparator() throws IOException { + if (comparator == null) { + for (int i = 0; i < subTerms.length; i++) { + if (subTerms[i] != null) { + comparator = subTerms[i].getComparator(); + if (comparator != null) { + return comparator; + } + } + } + } + return comparator; + } + + @Override + public long size() throws IOException { + final TermsEnum iterator = iterator(null); + int size = 0; + while (iterator.next() != null) { + size++; + } + return size; + } + + @Override + public long getSumTotalTermFreq() throws IOException { + long sum = 0; + final TermsEnum iterator = iterator(null); + while(iterator.next() != null) { + sum += iterator.totalTermFreq(); + } + if (sum == 0) { + return -1; + } + return sum; + } + + @Override + public long getSumDocFreq() throws IOException { + long sum = 0; + final TermsEnum iterator = iterator(null); + while(iterator.next() != null) { + sum += iterator.docFreq(); + } + if (sum == 0) { + return -1; + } + return sum; + } + + @Override + public int getDocCount() throws IOException { + // TODO: SY: can we actually compute this + return -1; + } + + @Override + public boolean hasOffsets() { + return hasOffsets; + } + + @Override + public boolean hasPositions() { + return hasPositions; + } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } + +} Index: lucene/core/src/java/org/apache/lucene/index/StackedTermsEnum.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StackedTermsEnum.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/StackedTermsEnum.java (working copy) @@ -0,0 +1,246 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeSet; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * TermsEnum for stacked segments (with updates). At the term level the terms + * are merged without taking into consideration fields replacements, so terms + * with no occurrences might return. Given a certain term, all the enumerations + * take into account fields replacements. + */ +public class StackedTermsEnum extends TermsEnum { + + private final Terms[] subTerms; + private final FieldGenerationReplacements replacements; + private Comparator comparator; + private TreeSet activeEnums; + + public StackedTermsEnum(Terms[] subTerms, + FieldGenerationReplacements replacements, Comparator comparator) + throws IOException { + this.subTerms = subTerms; + this.replacements = replacements; + this.comparator = comparator; + } + + @Override + public Comparator getComparator() { + return comparator; + } + + @Override + public BytesRef next() throws IOException { + if (activeEnums == null) { + init(); + return headTerm(); + } + + // get the current term (queue head) + BytesRef headTerm = headTerm(); + final BytesRef currentHead = BytesRef.deepCopyOf(headTerm); + + // advance all enums with same term + while (currentHead.equals(headTerm)) { + if (activeEnums.isEmpty()) { + return null; + } else { + final InnerTermsEnum polled = activeEnums.pollFirst(); + if (polled.advance()) { + activeEnums.add(polled); + } + if (activeEnums.isEmpty()) { + // done, return null + headTerm = null; + } else { + // still active, move to next enum + headTerm = headTerm(); + } + } + } + + return headTerm; + } + + private void init() throws IOException { + activeEnums = new TreeSet(); + for (int i = 0; i < subTerms.length; i++) { + if (subTerms[i] != null) { + final TermsEnum termsEnum = subTerms[i].iterator(null); + final BytesRef term = termsEnum.next(); + if (term != null) { + activeEnums.add(new InnerTermsEnum(i, termsEnum, term)); + } + } + } + } + + @Override + public BytesRef term() throws IOException { + return headTerm(); + } + + private BytesRef headTerm() { + final InnerTermsEnum head = activeEnums.first(); + if (head == null) { + return null; + } + return head.getTerm(); + } + + @Override + public SeekStatus seekCeil(BytesRef text, boolean useCache) + throws IOException { + // reset active enums + if (activeEnums == null) { + activeEnums = new TreeSet(); + } else { + activeEnums.clear(); + } + + // do seekCeil on all non-null subTerms + SeekStatus status = SeekStatus.END; + for (int i = 0; i < subTerms.length; i++) { + if (subTerms[i] != null) { + final TermsEnum termsEnum = subTerms[i].iterator(null); + final SeekStatus tempStatus = termsEnum.seekCeil(text, useCache); + if (tempStatus != SeekStatus.END) { + // put in new queue + activeEnums.add(new InnerTermsEnum(i, termsEnum, termsEnum.term())); + + // update status if needed + if (tempStatus == SeekStatus.FOUND) { + status = SeekStatus.FOUND; + } else if (status == SeekStatus.END) { + status = SeekStatus.NOT_FOUND; + } + } + } + } + return status; + } + + @Override + public long ord() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void seekExact(long ord) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int docFreq() throws IOException { + final DocsEnum docs = docs(null, null, 0); + int docFreq = 0; + while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + docFreq++; + } + return docFreq; + } + + @Override + public long totalTermFreq() throws IOException { + final DocsEnum docsEnum = docs(null, null); + int totalTermFreq = 0; + if (docsEnum != null) { + while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + totalTermFreq += docsEnum.freq(); + } + } + return totalTermFreq; + } + + @Override + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) + throws IOException { + // build map of active enums with indexes + Map activeMap = new HashMap(); + for (InnerTermsEnum inner : activeEnums.headSet(activeEnums.first(), true)) { + final DocsEnum docs = inner.termsEnum.docs(liveDocs, reuse, flags); + if (docs != null) { + activeMap.put(docs, inner.getIndex()); + } + } + + return new StackedDocsEnum(activeMap, replacements); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, + DocsAndPositionsEnum reuse, int flags) throws IOException { + // build map of active enums with indexes + Map activeMap = new HashMap(); + for (InnerTermsEnum inner : activeEnums.headSet(activeEnums.first(), true)) { + final DocsAndPositionsEnum docsAndPositions = inner.termsEnum + .docsAndPositions(liveDocs, reuse, flags); + if (docsAndPositions != null) { + activeMap.put(docsAndPositions, inner.getIndex()); + } + } + + if (activeMap.isEmpty()) { + return null; + } + + return new StackedDocsEnum(activeMap, replacements); + } + + private class InnerTermsEnum implements Comparable { + + private int index; + private TermsEnum termsEnum; + private BytesRef term; + + public InnerTermsEnum(int index, TermsEnum termsEnum, BytesRef term) { + this.index = index; + this.termsEnum = termsEnum; + this.term = term; + } + + public int getIndex() { + return index; + } + + public BytesRef getTerm() { + return term; + } + + public boolean advance() throws IOException { + term = termsEnum.next(); + return term != null; + } + + @Override + public int compareTo(InnerTermsEnum o) { + return comparator.compare(this.term, o.term); + } + + } + +} Index: lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (revision 1416361) +++ lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (working copy) @@ -112,7 +112,7 @@ assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID; lastDocID++; - + termsHash.reset(); reset(); assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end"); Index: lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (working copy) @@ -0,0 +1,288 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.FieldsUpdate.Operation; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Holds updates data for a certain segment. + */ +class UpdatedSegmentData { + + /** Updates mapped by doc ID, for each do sorted list of updates. */ + private TreeMap> updatesMap; + + public long generation; + + private Map fieldGenerationReplacments; + + private Iterator>> updatesIterator; + private int currDocID; + private int nextDocID; + private int numDocs; + private SortedSet nextUpdate; + private Analyzer analyzer; + + UpdatedSegmentData() { + updatesMap = new TreeMap>(); + } + + void addUpdate(int docID, FieldsUpdate update) { + SortedSet prevUpdates = updatesMap.get(docID); + if (prevUpdates == null) { + prevUpdates = new TreeSet(); + updatesMap.put(docID, prevUpdates); + } + prevUpdates.add(update); + } + + void addUpdates(int docID, FieldsUpdate[] updatesArray) { + SortedSet prevUpdates = updatesMap.get(docID); + if (prevUpdates == null) { + prevUpdates = new TreeSet(); + updatesMap.put(docID, prevUpdates); + } + for (int i = 0; i < updatesArray.length; i++) { + prevUpdates.add(updatesArray[i]); + } + } + + boolean hasUpdates() { + return !updatesMap.isEmpty(); + } + + /** + * Start writing updates to updates index. + * + * @param generation + * The updates generation. + * @param numDocs + * number of documents in the base segment + */ + void startWriting(long generation, int numDocs) { + this.generation = generation; + this.numDocs = numDocs; + updatesIterator = updatesMap.entrySet().iterator(); + currDocID = 0; + // fetch the first actual updates document if exists + nextDocUpdate(); + } + + /** + * Fetch next update and set iteration fields appropriately. + */ + private void nextDocUpdate() { + if (updatesIterator.hasNext()) { + Entry> docUpdates = updatesIterator + .next(); + nextDocID = docUpdates.getKey(); + nextUpdate = docUpdates.getValue(); + } else { + // no more updates + nextDocID = numDocs; + } + } + + /** + * Get the next document to put in the updates index, could be an empty + * document. Updates the analyzer. + * + * @throws IOException + * If different analyzers were assigned to field updates affecting + * the next document. + */ + IndexDocument nextDocument() throws IOException { + IndexDocument toReturn = null; + if (currDocID < nextDocID) { + // empty document required + if (currDocID == numDocs - 1) { + // add document with stored field for getting right size of segment when + // reading stored documents + toReturn = STORED_FIELD_DOCUMENT; + } else { + toReturn = EMPTY_DOCUMENT; + } + } else if (currDocID < numDocs) { + // return an actual updates document... + toReturn = new UpdatesIndexDocument(nextUpdate); + // ... and fetch the next one if exists + nextDocUpdate(); + } else { + // no more documents required + return null; + } + + currDocID++; + return toReturn; + } + + Analyzer getAnalyzer() { + return analyzer; + } + + Map getFieldGenerationReplacments() { + return fieldGenerationReplacments; + } + + /** + * An {@link IndexDocument} containing all the updates to a certain document + * in a stacked segment, taking into account replacements. + *

    + * Constructing an {@link UpdatesIndexDocument} also updates the containing + * {@link UpdatedSegmentData}'s analyzer and its + * {@link FieldGenerationReplacements} vectors for the relevant fields. + */ + private class UpdatesIndexDocument implements IndexDocument { + + Map> indexablesByField = new HashMap>(); + Map> storablesByField = new HashMap>(); + + public UpdatesIndexDocument(SortedSet fieldsUpdates) + throws IOException { + boolean setAnalyzer = true; + analyzer = null; + for (FieldsUpdate fieldsUpdate : fieldsUpdates) { + // set analyzer and check for analyzer conflict + if (setAnalyzer) { + analyzer = fieldsUpdate.analyzer; + setAnalyzer = false; + } else if (analyzer != fieldsUpdate.analyzer) { + throw new IOException( + "two analyzers assigned to one updated document"); + } + + if (fieldsUpdate.operation == Operation.REPLACE_FIELDS) { + // handle fields replacement + for (IndexableField field : fieldsUpdate.fields.indexableFields()) { + replaceField(field.name()); + } + for (StorableField field : fieldsUpdate.fields.storableFields()) { + replaceField(field.name()); + } + } + + // add new fields + for (IndexableField field : fieldsUpdate.fields.indexableFields()) { + List fieldList = indexablesByField.get(field.name()); + if (fieldList == null) { + fieldList = new ArrayList(); + indexablesByField.put(field.name(), fieldList); + } + fieldList.add(field); + } + for (StorableField field : fieldsUpdate.fields.storableFields()) { + List fieldList = storablesByField.get(field.name()); + if (fieldList == null) { + fieldList = new ArrayList(); + storablesByField.put(field.name(), fieldList); + } + fieldList.add(field); + } + } + } + + private void replaceField(String fieldName) { + // remove previous fields + indexablesByField.remove(fieldName); + storablesByField.remove(fieldName); + + // update field generation replacement vector + if (fieldGenerationReplacments == null) { + fieldGenerationReplacments = new HashMap(); + } + FieldGenerationReplacements fieldReplacement = fieldGenerationReplacments + .get(fieldName); + if (fieldReplacement == null) { + fieldReplacement = new FieldGenerationReplacements(); + fieldGenerationReplacments.put(fieldName, fieldReplacement); + } + fieldReplacement.set(currDocID, generation); + } + + @Override + public Iterable indexableFields() { + List indexableFields = new ArrayList(); + for (List byField : indexablesByField.values()) { + indexableFields.addAll(byField); + } + return indexableFields; + } + + @Override + public Iterable storableFields() { + List storableFields = new ArrayList(); + for (List byField : storablesByField.values()) { + storableFields.addAll(byField); + } + return storableFields; + } + + } + + /** + * An empty document to be used as filler to maintain doc IDs in stacked + * segments. + */ + private static final IndexDocument EMPTY_DOCUMENT = new IndexDocument() { + @Override + public Iterable storableFields() { + return Collections.emptyList(); + } + + @Override + public Iterable indexableFields() { + return Collections.emptyList(); + } + }; + + private static final ArrayList STORED_FIELD_LIST = new ArrayList( + 1); + static { + STORED_FIELD_LIST.add(new StoredField("dummy", "")); + } + + /** + * A document containing only one stored field to be used as the last document + * in stacked segments. + */ + private static final IndexDocument STORED_FIELD_DOCUMENT = new IndexDocument() { + @Override + public Iterable storableFields() { + return STORED_FIELD_LIST; + } + + @Override + public Iterable indexableFields() { + return Collections.emptyList(); + } + }; +} Index: lucene/core/src/test/org/apache/lucene/index/TestCodecs.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestCodecs.java (revision 1420477) +++ lucene/core/src/test/org/apache/lucene/index/TestCodecs.java (working copy) @@ -621,7 +621,8 @@ final int termIndexInterval = _TestUtil.nextInt(random(), 13, 27); final Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null, null); - final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, termIndexInterval, null, newIOContext(random())); + final SegmentWriteState state = + new SegmentWriteState(InfoStream.getDefault(), dir, si, 0, fieldInfos, termIndexInterval, null, null, newIOContext(random())); final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state); Arrays.sort(fields); Index: lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java (revision 1420478) +++ lucene/core/src/test/org/apache/lucene/index/TestDeletionPolicy.java (working copy) @@ -258,7 +258,7 @@ String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - gen); + gen, false); dir.deleteFile(IndexFileNames.SEGMENTS_GEN); boolean oneSecondResolution = true; @@ -269,7 +269,7 @@ reader.close(); fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - gen); + gen, false); // if we are on a filesystem that seems to have only // 1 second resolution, allow +1 second in commit @@ -286,7 +286,7 @@ break; } - dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen, false)); gen--; } @@ -370,7 +370,7 @@ while(gen > 0) { IndexReader reader = DirectoryReader.open(dir); reader.close(); - dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen, false)); gen--; if (gen > 0) { @@ -602,7 +602,7 @@ } } if (i < N) { - dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen, false)); } gen--; } @@ -719,7 +719,7 @@ } } if (i < N) { - dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen, false)); } gen--; } Index: lucene/core/src/test/org/apache/lucene/index/TestDoc.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestDoc.java (revision 1416361) +++ lucene/core/src/test/org/apache/lucene/index/TestDoc.java (working copy) @@ -232,14 +232,14 @@ info.setFiles(new HashSet(trackingDir.getCreatedFiles())); if (useCompoundFile) { - Collection filesToDelete = IndexWriter.createCompoundFile(InfoStream.getDefault(), dir, MergeState.CheckAbort.NONE, info, newIOContext(random())); + Collection filesToDelete = IndexWriter.createCompoundFile(InfoStream.getDefault(), dir, MergeState.CheckAbort.NONE, info, newIOContext(random()), -1); info.setUseCompoundFile(true); for (final String fileToDelete : filesToDelete) { si1.info.dir.deleteFile(fileToDelete); } } - return new SegmentInfoPerCommit(info, 0, -1L); + return new SegmentInfoPerCommit(info, 0, -1L, -1L); } Index: lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java (revision 1420477) +++ lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java (working copy) @@ -57,14 +57,14 @@ if (random().nextInt(20) == 0 || j == ids.length - 1) { queue.updateSlice(slice1); assertTrue(slice1.isTailItem(term)); - slice1.apply(bd1, j); + slice1.apply(bd1, null, j); assertAllBetween(last1, j, bd1, ids); last1 = j + 1; } if (random().nextInt(10) == 5 || j == ids.length - 1) { queue.updateSlice(slice2); assertTrue(slice2.isTailItem(term)); - slice2.apply(bd2, j); + slice2.apply(bd2, null, j); assertAllBetween(last2, j, bd2, ids); last2 = j + 1; } @@ -167,7 +167,7 @@ queue.tryApplyGlobalSlice(); assertTrue("changes in global buffer", queue.anyChanges()); FrozenBufferedDeletes freezeGlobalBuffer = queue.freezeGlobalBuffer(null); - assertTrue(freezeGlobalBuffer.any()); + assertTrue(freezeGlobalBuffer.anyDeletes()); assertEquals(1, freezeGlobalBuffer.termCount); assertFalse("all changes applied", queue.anyChanges()); } @@ -198,7 +198,7 @@ DeleteSlice slice = updateThread.slice; queue.updateSlice(slice); BufferedDeletes deletes = updateThread.deletes; - slice.apply(deletes, BufferedDeletes.MAX_INT); + slice.apply(deletes, null, BufferedDeletes.MAX_INT); assertEquals(uniqueValues, deletes.terms.keySet()); } queue.tryApplyGlobalSlice(); @@ -243,9 +243,9 @@ int i = 0; while ((i = index.getAndIncrement()) < ids.length) { Term term = new Term("id", ids[i].toString()); - queue.add(term, slice); + queue.add(term, slice, null); assertTrue(slice.isTailItem(term)); - slice.apply(deletes, BufferedDeletes.MAX_INT); + slice.apply(deletes, null, BufferedDeletes.MAX_INT); } } } Index: lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (working copy) @@ -0,0 +1,353 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldsUpdate.Operation; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +public class TestFieldReplacements extends LuceneTestCase { + private Directory dir; + + private static String[] fieldNames = null; + private static String[][] fieldTokens = null; + private static String loremIpsum = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, " + + "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. " + + "Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis " + + "nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in hendrerit " + + "in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis " + + "at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril " + + "delenit augue duis dolore te feugait nulla facilisi. Nam liber tempor cum soluta nobis " + + "eleifend option congue nihil imperdiet doming id quod mazim placerat facer possim assum. " + + "Typi non habent claritatem insitam; est usus legentis in iis qui facit eorum claritatem. " + + "Investigationes demonstraverunt lectores legere me lius quod ii legunt saepius. Claritas " + + "est etiam processus dynamicus, qui sequitur mutationem consuetudium lectorum. Mirum est " + + "notare quam littera gothica, quam nunc putamus parum claram, anteposuerit litterarum " + + "formas humanitatis per seacula quarta decima et quinta decima. Eodem modo typi, qui nunc " + + "nobis videntur parum clari, fiant sollemnes in futurum."; + + private final static boolean VERBOSE_FIELD_REPLACEMENTS = false; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + + // init fields data structures + int numFields = 4 + random().nextInt(4); + fieldNames = new String[numFields]; + fieldTokens = new String[numFields][]; + for (int i = 0; i < numFields; i++) { + fieldNames[i] = "f" + i; + ArrayList tokens = new ArrayList(); + final String[] allTokens = loremIpsum.split("\\s"); + for (int index = random().nextInt(2 + i); index < allTokens.length; index += 1 + random() + .nextInt(2 + i)) { + tokens.add(allTokens[index].toLowerCase()); + } + fieldTokens[i] = tokens.toArray(new String[tokens.size()]); + } + } + + @Override + public void tearDown() throws Exception { + dir.close(); + super.tearDown(); + } + + public void testEmptyIndex() throws IOException { + // test performing fields addition and replace on an empty index + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + + HashSet usedTerms = new HashSet(); + + Operation operation = Operation.REPLACE_FIELDS; + writer.updateFields(operation, getOperationTerm(usedTerms), + getFields(usedTerms)); + + operation = Operation.ADD_FIELDS; + writer.updateFields(operation, getOperationTerm(usedTerms), + getFields(usedTerms)); + + writer.close(); + + DirectoryReader directoryReader = DirectoryReader.open(dir); + assertEquals("Index should be empty", 0, directoryReader.maxDoc()); + directoryReader.close(); + } + + private void addDocuments() throws IOException { + + HashSet usedTerms = new HashSet(); + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + + // add random documents + int numDocs = 10 + random().nextInt(50); + int nCommits = 0; + for (int i = 0; i < numDocs; i++) { + + // create fields + Document fields = getFields(usedTerms); + + // select operation + int opIndex = random().nextInt(10); + Operation operation; + if (opIndex <= 1) { + if (opIndex == 0) { + operation = Operation.REPLACE_FIELDS; + } else { + operation = Operation.ADD_FIELDS; + } + if (VERBOSE_FIELD_REPLACEMENTS) { + System.out.println(operation); + } + + // create term if needed + Term term = getOperationTerm(usedTerms); + + writer.updateFields(operation, term, fields); + } else { + if (opIndex == 2) { + if (VERBOSE_FIELD_REPLACEMENTS) { + System.out.println("REPLACE_DOCUMENTS"); + } + Term term = getOperationTerm(usedTerms); + writer.replaceDocument(term, fields); + } else { + if (VERBOSE_FIELD_REPLACEMENTS) { + System.out.println("ADD_DOCUMENT"); + } + writer.addDocument(fields); + } + } + + // commit about once every 10 docs + int interCommit = random().nextInt(10); + if (interCommit == 0) { + if (VERBOSE_FIELD_REPLACEMENTS) { + System.out.println("commit " + (++nCommits)); + } + writer.commit(); + } + } + if (VERBOSE_FIELD_REPLACEMENTS) { + System.out.println("close"); + } + writer.close(); + } + + public Document getFields(HashSet usedTerms) { + Document fields = new Document(); + + int nFields = 1 + random().nextInt(5); + for (int j = 0; j < nFields; j++) { + boolean indexed = random().nextInt(8) > 0; + int index = random().nextInt(fieldNames.length); + String fieldName = fieldNames[index]; + String value = createFieldValue(fieldTokens[index], fieldName, indexed, + usedTerms); + + if (indexed) { + fields.add(new TextField(fieldName, value, Store.NO)); + if (VERBOSE_FIELD_REPLACEMENTS) { + System.out.print("Indexed\t"); + } + } else { + fields.add(new StoredField(fieldName, value)); + if (VERBOSE_FIELD_REPLACEMENTS) { + System.out.print("Stored\t"); + } + } + if (VERBOSE_FIELD_REPLACEMENTS) { + System.out.println(fieldName + "\t" + value); + } + } + return fields; + } + + public Term getOperationTerm(HashSet usedTerms) { + Term term = null; + boolean used = random().nextInt(5) < 4; + if (used && !usedTerms.isEmpty()) { + final Iterator iterator = usedTerms.iterator(); + int usedIndex = random().nextInt(usedTerms.size()); + for (int j = 0; j < usedIndex; j++) { + iterator.next(); + } + term = iterator.next(); + } else { + // select term + int fieldIndex = random().nextInt(fieldNames.length); + int textIndex = random().nextInt(fieldTokens[fieldIndex].length / 10); + term = new Term(fieldNames[fieldIndex], + fieldTokens[fieldIndex][textIndex]); + } + if (VERBOSE_FIELD_REPLACEMENTS) { + System.out.println("Term" + "\t" + term.field() + "\t" + term.text()); + } + return term; + } + + private String createFieldValue(String[] tokens, String fieldName, + boolean indexed, HashSet usedTerms) { + StringBuilder builder = new StringBuilder(); + + int index = random().nextInt(Math.min(10, tokens.length)); + + while (index < tokens.length) { + builder.append(tokens[index]); + builder.append(" "); + if (indexed) { + usedTerms.add(new Term(fieldName, tokens[index])); + } + index += 1 + random().nextInt(10); + } + + return builder.toString(); + } + + public void testRandomIndexGeneration() throws IOException { + addDocuments(); + DirectoryReader directoryReader = DirectoryReader.open(dir); + directoryReader.close(); + } + + public void testStatisticsAfterFieldUpdates() throws IOException { + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + + FieldType fieldType = new FieldType(); + fieldType.setIndexed(true); + fieldType.setTokenized(false); + fieldType.setOmitNorms(true); + fieldType.setStored(true); + + Document doc0 = new Document(); + doc0.add(new StoredField("f1", "a", fieldType)); + doc0.add(new StoredField("f1", "b", fieldType)); + writer.addDocument(doc0); + + Document doc1 = new Document(); + doc1.add(new StoredField("f1", "a", fieldType)); + doc1.add(new StoredField("f1", "c", fieldType)); + writer.addDocument(doc1); + + Document doc2 = new Document(); + doc2.add(new StoredField("f1", "b", fieldType)); + writer.addDocument(doc2); + + Document doc3 = new Document(); + doc3.add(new StoredField("f1", "d", fieldType)); + writer.updateFields(Operation.REPLACE_FIELDS, new Term("f1", "b"), doc3); + + writer.close(); + + DirectoryReader directoryReader = DirectoryReader.open(dir); + final AtomicReader atomicReader = directoryReader.leaves().get(0).reader(); + printField(atomicReader, "f1"); + + // check indexed fields + final DocsAndPositionsEnum termPositionsA = atomicReader + .termPositionsEnum(new Term("f1", "a")); + assertEquals("wrong doc id", 1, termPositionsA.nextDoc()); + assertEquals("wrong position", 0, termPositionsA.nextPosition()); + assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, + termPositionsA.nextDoc()); + + final DocsAndPositionsEnum termPositionsB = atomicReader + .termPositionsEnum(new Term("f1", "b")); + assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, + termPositionsB.nextDoc()); + + final DocsAndPositionsEnum termPositionsC = atomicReader + .termPositionsEnum(new Term("f1", "c")); + assertEquals("wrong doc id", 1, termPositionsC.nextDoc()); + assertEquals("wrong position", 1, termPositionsC.nextPosition()); + assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, + termPositionsC.nextDoc()); + + final DocsAndPositionsEnum termPositionsD = atomicReader + .termPositionsEnum(new Term("f1", "d")); + assertEquals("wrong doc id", 0, termPositionsD.nextDoc()); + assertEquals("wrong position", 0, termPositionsD.nextPosition()); + assertEquals("wrong doc id", 2, termPositionsD.nextDoc()); + assertEquals("wrong position", 0, termPositionsD.nextPosition()); + assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, + termPositionsD.nextDoc()); + + // check stored fields + final StoredDocument stored0 = atomicReader.document(0); + final StorableField[] f1_0 = stored0.getFields("f1"); + assertEquals("wrong numeber of stored fields", 1, f1_0.length); + assertEquals("wrong field value", "d", f1_0[0].stringValue()); + + final StoredDocument stored1 = atomicReader.document(1); + final StorableField[] f1_1 = stored1.getFields("f1"); + assertEquals("wrong numeber of stored fields", 2, f1_1.length); + assertEquals("wrong field value", "a", f1_1[0].stringValue()); + assertEquals("wrong field value", "c", f1_1[1].stringValue()); + + final StoredDocument stored2 = atomicReader.document(2); + final StorableField[] f1_2 = stored2.getFields("f1"); + assertEquals("wrong numeber of stored fields", 1, f1_2.length); + assertEquals("wrong field value", "d", f1_2[0].stringValue()); + + directoryReader.close(); + + } + + private void printField(AtomicReader atomicReader, String fieldName) + throws IOException { + if (!VERBOSE_FIELD_REPLACEMENTS) { + return; + } + + System.out.println("field: " + fieldName); + final Terms terms = atomicReader.fields().terms(fieldName); + final TermsEnum iterator = terms.iterator(null); + BytesRef term; + while ((term = iterator.next()) != null) { + System.out.println("term: " + term); + final DocsAndPositionsEnum termPositionsEnum = atomicReader + .termPositionsEnum(new Term(fieldName, term)); + while (termPositionsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + System.out.print("doc: " + termPositionsEnum.docID()); + for (int i = 0; i < termPositionsEnum.freq(); i++) { + System.out.print("\t" + termPositionsEnum.nextPosition()); + } + System.out.println(); + } + } + } +} Index: lucene/core/src/test/org/apache/lucene/index/TestFieldUpdates.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestFieldUpdates.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/index/TestFieldUpdates.java (working copy) @@ -0,0 +1,145 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.FieldsUpdate.Operation; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestFieldUpdates extends LuceneTestCase { + private Directory dir; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + } + + @Override + public void tearDown() throws Exception { + dir.close(); + super.tearDown(); + } + + public void testUpdateDocumentBeforeCommit() throws Exception { + createAndAssertSegment(false); + } + + public void testUpdateDocumentAfterCommit() throws Exception { + createAndAssertSegment(true); + } + + private void createAndAssertSegment(boolean interCommit) throws IOException { + // added doc contains at least the first field, updated field at least the + // last field, other fields split in the middle randomly + int numFields = DocHelper.numFields(); + int cutoff = random().nextInt(numFields - 2); + createSegment(cutoff + 1, interCommit); + assertSegment(); + } + + private void createSegment(int cutoff, boolean interCommit) + throws IOException { + // add base document + Document testDoc = new Document(); + DocHelper.setupDoc(testDoc, 0, cutoff); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + writer.addDocument(testDoc); + if (interCommit) { + writer.commit(); + } + + // add updates to base document + Document updateDoc = new Document(); + DocHelper.setupDoc(updateDoc, cutoff, DocHelper.numFields()); + writer.updateFields(Operation.ADD_FIELDS, new Term( + DocHelper.TEXT_FIELD_1_KEY, DocHelper.FIELD_1_TEXT.split(" ")[0]), + updateDoc); + writer.close(); + } + + private void assertSegment() throws IOException { + // After adding the document, we should be able to read it back in + DirectoryReader directoryReader = DirectoryReader.open(dir); + List leaves = directoryReader.leaves(); + assertEquals("wrong number of atomic readers", 1, leaves.size()); + AtomicReaderContext atomicReaderContext = leaves.get(0); + AtomicReader reader = atomicReaderContext.reader(); + assertTrue(reader != null); + StoredDocument doc = reader.document(0); + assertTrue(doc != null); + + // System.out.println("Document: " + doc); + StorableField[] fields = doc.getFields("textField2"); + assertTrue(fields != null && fields.length == 1); + assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT)); + assertTrue(fields[0].fieldType().storeTermVectors()); + + fields = doc.getFields("textField1"); + assertTrue(fields != null && fields.length == 1); + assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT)); + assertFalse(fields[0].fieldType().storeTermVectors()); + + fields = doc.getFields("keyField"); + assertTrue(fields != null && fields.length == 1); + assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT)); + + fields = doc.getFields(DocHelper.NO_NORMS_KEY); + assertTrue(fields != null && fields.length == 1); + assertTrue(fields[0].stringValue().equals(DocHelper.NO_NORMS_TEXT)); + + fields = doc.getFields(DocHelper.TEXT_FIELD_3_KEY); + assertTrue(fields != null && fields.length == 1); + assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT)); + + // test that the norms are not present in the segment if + // omitNorms is true + for (FieldInfo fi : reader.getFieldInfos()) { + if (fi.isIndexed()) { + assertTrue(fi.omitsNorms() == (reader.normValues(fi.name) == null)); + } + } + reader.close(); + } + + public void testSegmentWithDeletion() throws IOException { + // add base document + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + Document testDoc = new Document(); + DocHelper.setupDoc(testDoc, 0, 5); + writer.addDocument(testDoc); + testDoc = new Document(); + DocHelper.setupDoc(testDoc, 5, DocHelper.numFields()); + writer.addDocument(testDoc); + writer.commit(); + + writer.deleteDocuments(new Term(DocHelper.TEXT_FIELD_1_KEY, + DocHelper.FIELD_1_TEXT.split(" ")[0])); + writer.close(); + + assertSegment(); + } + +} Index: lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision 1416361) +++ lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (working copy) @@ -1058,7 +1058,7 @@ final String segmentsFileName = SegmentInfos.getLastCommitSegmentsFileName(dir); IndexInput in = dir.openInput(segmentsFileName, newIOContext(random())); - IndexOutput out = dir.createOutput(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", 1+gen), newIOContext(random())); + IndexOutput out = dir.createOutput(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", 1+gen, false), newIOContext(random())); out.copyBytes(in, in.length()-1); byte b = in.readByte(); out.writeByte((byte) (1+b)); @@ -1104,7 +1104,7 @@ String fileNameIn = SegmentInfos.getLastCommitSegmentsFileName(dir); String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - 1+gen); + 1+gen, false); IndexInput in = dir.openInput(fileNameIn, newIOContext(random())); IndexOutput out = dir.createOutput(fileNameOut, newIOContext(random())); long length = in.length(); @@ -1210,7 +1210,7 @@ String fileNameIn = SegmentInfos.getLastCommitSegmentsFileName(dir); String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - 1+gen); + 1+gen, false); IndexInput in = dir.openInput(fileNameIn, newIOContext(random())); IndexOutput out = dir.createOutput(fileNameOut, newIOContext(random())); long length = in.length(); Index: lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 1416361) +++ lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy) @@ -92,7 +92,7 @@ SegmentReader mergedReader = new SegmentReader(new SegmentInfoPerCommit( new SegmentInfo(mergedDir, Constants.LUCENE_MAIN_VERSION, mergedSegment, docsMerged, false, codec, null, null), - 0, -1L), + 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); Index: lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java =================================================================== --- lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java (revision 1416361) +++ lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java (working copy) @@ -141,7 +141,7 @@ SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.getDocCount(), info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.attributes()); - destInfos.add(new SegmentInfoPerCommit(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen())); + destInfos.add(new SegmentInfoPerCommit(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen(), -1L)); // now copy files over Collection files = infoPerCommit.files(); for (final String srcName : files) { Index: lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (revision 1420477) +++ lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (working copy) @@ -481,8 +481,8 @@ long bytes = totalPostings * 8 + totalPayloadBytes; SegmentWriteState writeState = new SegmentWriteState(null, dir, - segmentInfo, newFieldInfos, - 32, null, new IOContext(new FlushInfo(maxDoc, bytes))); + segmentInfo, 0, newFieldInfos, + 32, null, null, new IOContext(new FlushInfo(maxDoc, bytes))); FieldsConsumer fieldsConsumer = codec.postingsFormat().fieldsConsumer(writeState); for(Map.Entry> fieldEnt : fields.entrySet()) { Index: lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java (revision 1416361) +++ lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java (working copy) @@ -254,6 +254,23 @@ } /** + * Adds part of the fields above to a document + * @param doc The document to write + * @param from index of the first field to add + * @param to index of the last field to add + 1 + */ + public static void setupDoc(Document doc, int from, int to) { + for (int i=from; i