Index: lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (revision 1528310) +++ lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (working copy) @@ -232,6 +232,10 @@ assert fullDelCount <= rld.info.info.getDocCount(); segAllDeletes = fullDelCount == rld.info.info.getDocCount(); } finally { + if (anyNewUpdates) { // nocommit HACK + rld.writeLiveDocs(info.info.dir); + rld.reopenReader(IOContext.READ); + } rld.release(reader); readerPool.release(rld); } @@ -278,7 +282,11 @@ final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount(); assert fullDelCount <= rld.info.info.getDocCount(); segAllDeletes = fullDelCount == rld.info.info.getDocCount(); - } finally { + } finally { + if (anyNewUpdates) { // nocommit HACK + rld.writeLiveDocs(info.info.dir); + rld.reopenReader(IOContext.READ); + } rld.release(reader); readerPool.release(rld); } Index: lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (revision 1528310) +++ lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (working copy) @@ -31,7 +31,9 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util._TestUtil; +import org.apache.mahout.math.Arrays; import org.junit.Test; import com.carrotsearch.randomizedtesting.generators.RandomPicks; @@ -1235,5 +1237,90 @@ writer.close(); dir.close(); } + + @Test + public void testTonsOfUpdates() throws Exception { + // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM + Directory dir = newDirectory();//newFSDirectory(_TestUtil.getTempDir("tonsOfUpdates")); + final Random random = random(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); + conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc + conf.setCodec(new Lucene46Codec()); // nocommit __REMOVE + IndexWriter writer = new IndexWriter(dir, conf); + + // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) + final int numDocs = atLeast(20000); + final int numNumericFields = atLeast(5); + final int numTerms = _TestUtil.nextInt(random, 10, 100); // terms should affect many docs + Set updateTerms = new HashSet(); + while (updateTerms.size() < numTerms) { + updateTerms.add(_TestUtil.randomSimpleString(random)); + } + + System.out.println("numDocs=" + numDocs + " numNumericFields=" + numNumericFields + " numTerms=" + numTerms); // nocommit __COMMENT + + // build a large index with many NDV fields and update terms + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numUpdateTerms = _TestUtil.nextInt(random, 1, numTerms / 10); + for (int j = 0; j < numUpdateTerms; j++) { + doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO)); + } + for (int j = 0; j < numNumericFields; j++) { + long val = random.nextInt(); + doc.add(new NumericDocValuesField("f" + j, val)); + doc.add(new NumericDocValuesField("cf" + j, val * 2)); + } + writer.addDocument(doc); + } + + writer.commit(); // commit so there's something to apply to + + // set to flush every 2048 bytes (approximately every 12 updates), so we get + // many flushes during numeric updates + writer.getConfig().setRAMBufferSizeMB(2048.0 / 1024 / 1024); + final int numUpdates = atLeast(100); + System.out.println("numUpdates=" + numUpdates); // nocommit __COMMENT + long lastPoolSize = 0; + int numRawGrows = 0; + for (int i = 0; i < numUpdates; i++) { + int field = random.nextInt(numNumericFields); + Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms)); + long value = random.nextInt(); + writer.updateNumericDocValue(updateTerm, "f" + field, value); + writer.updateNumericDocValue(updateTerm, "cf" + field, value * 2); + + if (i % 20 == 0) { + long poolSize = RamUsageEstimator.sizeOf(writer.readerPool); + if (poolSize > lastPoolSize) { + ++numRawGrows; + assertTrue("too many successive increases in ReaderPool size", numRawGrows <= 10); + System.out.println("pool size grew by " + (poolSize - lastPoolSize) + " bytes; numRawGrows=" + numRawGrows); // nocommit __COMMENT + } else { + System.out.println("pool size shrunk (old=" + lastPoolSize + ", new=" + poolSize + "); reset numRawGrows"); // nocommit __COMMENT + numRawGrows = 0; + } + lastPoolSize = poolSize; + } + } + + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + for (AtomicReaderContext context : reader.leaves()) { + for (int i = 0; i < numNumericFields; i++) { + AtomicReader r = context.reader(); + NumericDocValues f = r.getNumericDocValues("f" + i); + NumericDocValues cf = r.getNumericDocValues("cf" + i); + for (int j = 0; j < r.maxDoc(); j++) { + assertEquals("field=f" + i + ", doc=" + j, cf.get(j), f.get(j) * 2); + } + } + } + reader.close(); + + dir.close(); + } }