Index: lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (revision 1527612) +++ lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (working copy) @@ -30,6 +30,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util._TestUtil; import org.junit.Test; @@ -1211,5 +1212,74 @@ writer.close(); dir.close(); } + + @Test + public void testTonsOfUpdates() throws Exception { + // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM + Directory dir = newFSDirectory(_TestUtil.getTempDir("tonsOfUpdates")); + final Random random = random(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); + conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc + IndexWriter writer = new IndexWriter(dir, conf); + + // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) + final int numDocs = atLeast(40000); + final int numNumericFields = atLeast(5); + final int numTerms = _TestUtil.nextInt(random, 10, 100); // terms should affect many docs + Set updateTerms = new HashSet(); + while (updateTerms.size() < numTerms) { + updateTerms.add(_TestUtil.randomSimpleString(random)); + } + + System.out.println("numDocs=" + numDocs + " numNumericFields=" + numNumericFields + " numTerms=" + numTerms); // nocommit __COMMENT + + // build a large index with many NDV fields and update terms + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numUpdateTerms = _TestUtil.nextInt(random, 1, numTerms / 10); + for (int j = 0; j < numUpdateTerms; j++) { + doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO)); + } + for (int j = 0; j < numNumericFields; j++) { + long val = random.nextInt(); + doc.add(new NumericDocValuesField("f" + j, val)); + doc.add(new NumericDocValuesField("cf" + j, val * 2)); + } + writer.addDocument(doc); + } + + // set to flush every 512 bytes, so we get many flushes during numeric updates + writer.getConfig().setRAMBufferSizeMB(0.05); + final int numUpdates = atLeast(3000); + System.out.println("numUpdates=" + numUpdates); // nocommit __COMMENT + for (int i = 0; i < numUpdates; i++) { + int field = random.nextInt(numNumericFields); + Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms)); + long value = random.nextInt(); + writer.updateNumericDocValue(updateTerm, "f" + field, value); + writer.updateNumericDocValue(updateTerm, "cf" + field, value * 2); + + if (i % 100 == 0) { // nocommit use it to assert? + System.out.println(RamUsageEstimator.sizeOf(writer)); + } + } + + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + for (AtomicReaderContext context : reader.leaves()) { + for (int i = 0; i < numNumericFields; i++) { + NumericDocValues f = context.reader().getNumericDocValues("f" + i); + NumericDocValues cf = context.reader().getNumericDocValues("cf" + i); + for (int j = 0; j < context.reader().maxDoc(); j++) { + assertEquals(cf.get(j), f.get(j) * 2); + } + } + } + reader.close(); + + dir.close(); + } }