Index: lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (revision 1527318) +++ lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (working copy) @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.codecs.Codec; @@ -351,6 +352,7 @@ // We can write directly to the actual name (vs to a // .tmp & renaming it) because the file is not live // until segments file is written: + FieldInfos fieldInfos = null; boolean success = false; try { Codec codec = info.info.getCodec(); @@ -385,7 +387,7 @@ builder.addOrUpdate(f, NumericDocValuesField.TYPE); } - final FieldInfos fieldInfos = builder.finish(); + fieldInfos = builder.finish(); final long nextFieldInfosGen = info.getNextFieldInfosGen(); final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX); final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, IOContext.DEFAULT, segmentSuffix); @@ -502,10 +504,25 @@ copyUpdatesToMerging(); } numericUpdates.clear(); + + // create a new map, keeping only the gens that are in use + Map> genUpdatesFiles = info.getUpdatesFiles(); + Map> newGenUpdatesFiles = new HashMap>(); + final long fieldInfosGen = info.getFieldInfosGen(); + for (FieldInfo fi : fieldInfos) { + long dvGen = fi.getDocValuesGen(); + if (dvGen != -1 && !newGenUpdatesFiles.containsKey(dvGen)) { + if (dvGen == fieldInfosGen) { + newGenUpdatesFiles.put(fieldInfosGen, trackingDir.getCreatedFiles()); + } else { + newGenUpdatesFiles.put(dvGen, genUpdatesFiles.get(dvGen)); + } + } + } + + info.setGenUpdatesFiles(newGenUpdatesFiles); } - info.addUpdatesFiles(trackingDir.getCreatedFiles()); - return true; } Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (revision 1527318) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (working copy) @@ -19,7 +19,11 @@ import java.io.IOException; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.lucene.store.Directory; @@ -51,8 +55,8 @@ // write private long nextWriteFieldInfosGen; - // Tracks the files with field updates - private Set updatesFiles = new HashSet(); + // Track the per-generation updates files + private final Map> genUpdatesFiles = new HashMap>(); private volatile long sizeInBytes = -1; @@ -86,14 +90,15 @@ } } - /** Returns the files which contains field updates. */ - public Set getUpdatesFiles() { - return new HashSet(updatesFiles); + /** Returns the per generation updates files. */ + public Map> getUpdatesFiles() { + return Collections.unmodifiableMap(genUpdatesFiles); } - /** Called when we succeed in writing field updates. */ - public void addUpdatesFiles(Set files) { - updatesFiles.addAll(files); + /** Sets the updates file names per generation. Does not clone map. */ + public void setGenUpdatesFiles(Map> genUpdatesFiles) { + this.genUpdatesFiles.clear(); + this.genUpdatesFiles.putAll(genUpdatesFiles); } /** Called when we succeed in writing deletes */ @@ -151,7 +156,9 @@ info.getCodec().liveDocsFormat().files(this, files); // Must separately add any field updates files - files.addAll(updatesFiles); + for (Set updateFiles : genUpdatesFiles.values()) { + files.addAll(updateFiles); + } return files; } @@ -248,7 +255,10 @@ other.nextWriteDelGen = nextWriteDelGen; other.nextWriteFieldInfosGen = nextWriteFieldInfosGen; - other.updatesFiles.addAll(updatesFiles); + // deep clone + for (Entry> e : genUpdatesFiles.entrySet()) { + other.genUpdatesFiles.put(e.getKey(), new HashSet(e.getValue())); + } return other; } Index: lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (revision 1527318) +++ lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -28,6 +28,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.lucene.codecs.Codec; @@ -349,7 +350,17 @@ } SegmentInfoPerCommit siPerCommit = new SegmentInfoPerCommit(info, delCount, delGen, fieldInfosGen); if (format >= VERSION_46) { - siPerCommit.addUpdatesFiles(input.readStringSet()); + int numGensUpdatesFiles = input.readInt(); + final Map> genUpdatesFiles; + if (numGensUpdatesFiles == 0) { + genUpdatesFiles = Collections.emptyMap(); + } else { + genUpdatesFiles = new HashMap>(); + for (int i = 0; i < numGensUpdatesFiles; i++) { + genUpdatesFiles.put(input.readLong(), input.readStringSet()); + } + } + siPerCommit.setGenUpdatesFiles(genUpdatesFiles); } add(siPerCommit); } @@ -420,7 +431,12 @@ segnOutput.writeLong(siPerCommit.getDelGen()); segnOutput.writeInt(siPerCommit.getDelCount()); segnOutput.writeLong(siPerCommit.getFieldInfosGen()); - segnOutput.writeStringSet(siPerCommit.getUpdatesFiles()); + final Map> genUpdatesFiles = siPerCommit.getUpdatesFiles(); + segnOutput.writeInt(genUpdatesFiles.size()); + for (Entry> e : genUpdatesFiles.entrySet()) { + segnOutput.writeLong(e.getKey()); + segnOutput.writeStringSet(e.getValue()); + } assert si.dir == directory; assert siPerCommit.getDelCount() <= si.getDocCount(); Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1527318) +++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -110,7 +110,7 @@ final DocValuesFormat dvFormat = codec.docValuesFormat(); // initialize the per generation numericDVProducers and put the correct // DVProducer for each field - final Map> genInfos = getGenInfos(si); + final Map> genInfos = getGenInfos(); // System.out.println("[" + Thread.currentThread().getName() + "] SR.init: new reader: " + si + "; gens=" + genInfos.keySet()); @@ -178,7 +178,7 @@ final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir; final DocValuesFormat dvFormat = codec.docValuesFormat(); - final Map> genInfos = getGenInfos(si); + final Map> genInfos = getGenInfos(); for (Entry> e : genInfos.entrySet()) { Long gen = e.getKey(); @@ -244,7 +244,7 @@ } // returns a gen->List mapping. Fields without DV updates have gen=-1 - private Map> getGenInfos(SegmentInfoPerCommit si) { + private Map> getGenInfos() { final Map> genInfos = new HashMap>(); for (FieldInfo fi : fieldInfos) { if (fi.getDocValuesType() == null) { Index: lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (revision 1527318) +++ lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (working copy) @@ -5,6 +5,7 @@ import java.util.Random; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; @@ -28,8 +29,8 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util._TestUtil; -import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.junit.Test; import com.carrotsearch.randomizedtesting.generators.RandomPicks; @@ -917,10 +918,11 @@ final IndexWriter writer = new IndexWriter(dir, conf); // create index - final int numThreads = atLeast(3); + final int numThreads = _TestUtil.nextInt(random(), 3, 6); final int numDocs = atLeast(2000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); + doc.add(new StringField("id", "doc" + i, Store.NO)); double group = random().nextDouble(); String g; if (group < 0.1) g = "g0"; @@ -937,6 +939,7 @@ } final CountDownLatch done = new CountDownLatch(numThreads); + final AtomicInteger numUpdates = new AtomicInteger(atLeast(100)); // same thread updates a field as well as reopens Thread[] threads = new Thread[numThreads]; @@ -943,14 +946,14 @@ for (int i = 0; i < threads.length; i++) { final String f = "f" + i; final String cf = "cf" + i; - final int numThreadUpdates = atLeast(40); threads[i] = new Thread("UpdateThread-" + i) { @Override public void run() { + DirectoryReader reader = null; + boolean success = false; try { Random random = random(); - int numUpdates = numThreadUpdates; - while (numUpdates-- > 0) { + while (numUpdates.getAndDecrement() > 0) { double group = random.nextDouble(); Term t; if (group < 0.1) t = new Term("updKey", "g0"); @@ -965,20 +968,43 @@ if (random.nextDouble() < 0.2) { // delete a random document int doc = random.nextInt(numDocs); +// System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc); writer.deleteDocuments(new Term("id", "doc" + doc)); } - - if (random.nextDouble() < 0.1) { - writer.commit(); // rarely commit + + if (random.nextDouble() < 0.05) { // commit every 20 updates on average +// System.out.println("[" + Thread.currentThread().getName() + "] commit"); + writer.commit(); } - if (random.nextDouble() < 0.3) { // obtain NRT reader (apply updates) - DirectoryReader.open(writer, true).close(); + if (random.nextDouble() < 0.1) { // reopen NRT reader (apply updates), on average once every 10 updates + if (reader == null) { +// System.out.println("[" + Thread.currentThread().getName() + "] open NRT"); + reader = DirectoryReader.open(writer, true); + } else { +// System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT"); + DirectoryReader r2 = DirectoryReader.openIfChanged(reader, writer, true); + if (r2 != null) { + reader.close(); + reader = r2; + } + } } } +// System.out.println("[" + Thread.currentThread().getName() + "] DONE"); + success = true; } catch (IOException e) { throw new RuntimeException(e); } finally { + if (reader != null) { + try { + reader.close(); + } catch (IOException e) { + if (success) { // suppress this exception only if there was another exception + throw new RuntimeException(e); + } + } + } done.countDown(); } } @@ -1152,5 +1178,31 @@ IOUtils.close(dir1, dir2); } + + @Test + public void testDeleteUnusedUpdatesFiles() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.add(new StringField("id", "d0", Store.NO)); + doc.add(new NumericDocValuesField("f", 1L)); + writer.addDocument(doc); + + // create _0_1.fnm + writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L); + writer.commit(); + + // create _0_2.fnm, and _0_1.fnm should be deleted + writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L); + writer.commit(); + + assertTrue(dir.fileExists("_0_2.fnm")); + assertFalse("old generation field infos file should not exist in the directory: _0_1.fnm", dir.fileExists("_0_1.fnm")); + + writer.close(); + dir.close(); + } }