Index: lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java (revision 1528761) +++ lucene/core/src/java/org/apache/lucene/index/BufferedDeletes.java (working copy) @@ -66,30 +66,33 @@ /* Rough logic: NumericUpdate calculates its actual size, * including the update Term and DV field (String). The - * per-term map holds a reference to the update Term, and + * per-field map holds a reference to the updated field, and * therefore we only account for the object reference and * map space itself. This is incremented when we first see - * an update Term. + * an updated field. + * + * HashMap has an array[Entry] w/ varying load + * factor (say 2*POINTER). Entry is an object w/ String key, + * LinkedHashMap val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). + * + * LinkedHashMap (val) is counted as OBJ_HEADER, array[Entry] ref + header, 4*INT, 1*FLOAT, + * Set (entrySet) (2*OBJ_HEADER + ARRAY_HEADER + 2*POINTER + 4*INT + FLOAT) + */ + final static int BYTES_PER_NUMERIC_FIELD_ENTRY = + 7*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 3*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + 5*RamUsageEstimator.NUM_BYTES_INT + RamUsageEstimator.NUM_BYTES_FLOAT; + + /* Rough logic: Incremented when we see another Term for an already updated + * field. * LinkedHashMap has an array[Entry] w/ varying load factor - * (say 2*POINTER). Entry is an object w/ Term key, Map val, + * (say 2*POINTER). Entry is an object w/ Term key, NumericUpdate val, * int hash, Entry next, Entry before, Entry after (OBJ_HEADER + 5*POINTER + INT). + * * Term (key) is counted only as POINTER. - * Map (val) is counted as OBJ_HEADER, array[Entry] ref + header, 4*INT, 1*FLOAT, - * Set (entrySet) (2*OBJ_HEADER + ARRAY_HEADER + 2*POINTER + 4*INT + FLOAT) + * NumericUpdate (val) counts its own size and isn't accounted for here. */ - final static int BYTES_PER_NUMERIC_UPDATE_TERM_ENTRY = - 9*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 3*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + - RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + 5*RamUsageEstimator.NUM_BYTES_INT + RamUsageEstimator.NUM_BYTES_FLOAT; + final static int BYTES_PER_NUMERIC_UPDATE_ENTRY = 7*RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT; - /* Rough logic: Incremented when we see another field for an already updated - * Term. - * HashMap has an array[Entry] w/ varying load - * factor (say 2*POINTER). Entry is an object w/ String key, - * NumericUpdate val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). - * NumericUpdate returns its own size, and therefore isn't accounted for here. - */ - final static int BYTES_PER_NUMERIC_UPDATE_ENTRY = 5*RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT; - final AtomicInteger numTermDeletes = new AtomicInteger(); final AtomicInteger numNumericUpdates = new AtomicInteger(); final Map terms = new HashMap(); @@ -96,12 +99,14 @@ final Map queries = new HashMap(); final List docIDs = new ArrayList(); - // Map> - // LinkedHashMap because we need to preserve the order of the updates. That - // is, if two terms update the same document and same DV field, whoever came - // in last should win. LHM guarantees we iterate on the map in insertion - // order. - final Map> numericUpdates = new LinkedHashMap>(); + // Map> + // For each field we keep an ordered list of NumericUpdates, key'd by the + // update Term. LinkedHashMap guarantees we will later traverse the map in + // insertion order (so that if two terms affect the same document, the last + // one that came in wins), and helps us detect faster if the same Term is + // used to update the same field multiple times (so we later traverse it + // only once). + final Map> numericUpdates = new HashMap>(); public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE); @@ -180,13 +185,13 @@ } public void addNumericUpdate(NumericUpdate update, int docIDUpto) { - Map termUpdates = numericUpdates.get(update.term); - if (termUpdates == null) { - termUpdates = new HashMap(); - numericUpdates.put(update.term, termUpdates); - bytesUsed.addAndGet(BYTES_PER_NUMERIC_UPDATE_TERM_ENTRY); + LinkedHashMap fieldUpdates = numericUpdates.get(update.field); + if (fieldUpdates == null) { + fieldUpdates = new LinkedHashMap(); + numericUpdates.put(update.field, fieldUpdates); + bytesUsed.addAndGet(BYTES_PER_NUMERIC_FIELD_ENTRY); } - final NumericUpdate current = termUpdates.get(update.field); + final NumericUpdate current = fieldUpdates.get(update.term); if (current != null && docIDUpto < current.docIDUpto) { // Only record the new number if it's greater than or equal to the current // one. This is important because if multiple threads are replacing the @@ -196,7 +201,12 @@ } update.docIDUpto = docIDUpto; - termUpdates.put(update.field, update); + // since it's a LinkedHashMap, we must first remove the Term entry so that + // it's added last (we're interested in insertion-order). + if (current != null) { + fieldUpdates.remove(update.term); + } + fieldUpdates.put(update.term, update); numNumericUpdates.incrementAndGet(); if (current == null) { bytesUsed.addAndGet(BYTES_PER_NUMERIC_UPDATE_ENTRY + update.sizeInBytes()); Index: lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (revision 1528761) +++ lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (working copy) @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -78,9 +79,13 @@ upto++; } + // TODO if a Term affects multiple fields, we could keep the updates key'd by Term + // so that it maps to all fields it affects, sorted by their docUpto, and traverse + // that Term only once, applying the update to all fields that still need to be + // updated. List allUpdates = new ArrayList(); int numericUpdatesSize = 0; - for (Map fieldUpdates : deletes.numericUpdates.values()) { + for (LinkedHashMap fieldUpdates : deletes.numericUpdates.values()) { for (NumericUpdate update : fieldUpdates.values()) { allUpdates.add(update); numericUpdatesSize += update.sizeInBytes(); Index: lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (revision 1528761) +++ lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java (working copy) @@ -409,6 +409,7 @@ fieldsConsumer.addNumericField(fieldInfo, new Iterable() { @SuppressWarnings("synthetic-access") final NumericDocValues currentValues = reader.getNumericDocValues(field); + final Bits docsWithField = reader.getDocsWithField(field); @Override public Iterator iterator() { return new Iterator() { @@ -429,7 +430,10 @@ } Long updatedValue = updates.get(curDoc); if (updatedValue == null) { - updatedValue = Long.valueOf(currentValues.get(curDoc)); + // only read the current value if the document had a value before + if (currentValues != null && docsWithField.get(curDoc)) { + updatedValue = currentValues.get(curDoc); + } } else if (updatedValue == NumericUpdate.MISSING) { updatedValue = null; } Index: lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (revision 1528761) +++ lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java (working copy) @@ -512,7 +512,7 @@ } @Test - public void testUpdateNonDocValueField() throws Exception { + public void testUpdateNonNumericDocValuesField() throws Exception { // we don't support adding new fields or updating existing non-numeric-dv // fields through numeric updates Directory dir = newDirectory(); @@ -811,8 +811,11 @@ // first segment with NDV Document doc = new Document(); doc.add(new StringField("id", "doc0", Store.NO)); - doc.add(new NumericDocValuesField("ndv", 5)); + doc.add(new NumericDocValuesField("ndv", 3)); writer.addDocument(doc); + doc = new Document(); + doc.add(new StringField("id", "doc4", Store.NO)); // document without 'ndv' field + writer.addDocument(doc); writer.commit(); // second segment with no NDV @@ -819,9 +822,17 @@ doc = new Document(); doc.add(new StringField("id", "doc1", Store.NO)); writer.addDocument(doc); + doc = new Document(); + doc.add(new StringField("id", "doc2", Store.NO)); // document that isn't updated + writer.addDocument(doc); writer.commit(); - // update document in the second segment + // update document in the first segment - should not affect docsWithField of + // the document without NDV field + writer.updateNumericDocValue(new Term("id", "doc0"), "ndv", 5L); + + // update document in the second segment - field should be added and we should + // be able to handle the other document correctly (e.g. no NPE) writer.updateNumericDocValue(new Term("id", "doc1"), "ndv", 5L); writer.close(); @@ -829,9 +840,12 @@ for (AtomicReaderContext context : reader.leaves()) { AtomicReader r = context.reader(); NumericDocValues ndv = r.getNumericDocValues("ndv"); - for (int i = 0; i < r.maxDoc(); i++) { - assertEquals(5L, ndv.get(i)); - } + Bits docsWithField = r.getDocsWithField("ndv"); + assertNotNull(docsWithField); + assertTrue(docsWithField.get(0)); + assertEquals(5L, ndv.get(0)); + assertFalse(docsWithField.get(1)); + assertEquals(0L, ndv.get(1)); } reader.close(); @@ -1236,4 +1250,31 @@ dir.close(); } + @Test + public void testUpdatesOrder() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.add(new StringField("upd", "t1", Store.NO)); + doc.add(new StringField("upd", "t2", Store.NO)); + doc.add(new NumericDocValuesField("f1", 1L)); + doc.add(new NumericDocValuesField("f2", 1L)); + writer.addDocument(doc); + writer.updateNumericDocValue(new Term("upd", "t1"), "f1", 2L); // update f1 to 2 + writer.updateNumericDocValue(new Term("upd", "t1"), "f2", 2L); // update f2 to 2 + writer.updateNumericDocValue(new Term("upd", "t2"), "f1", 3L); // update f1 to 3 + writer.updateNumericDocValue(new Term("upd", "t2"), "f2", 3L); // update f2 to 3 + writer.updateNumericDocValue(new Term("upd", "t1"), "f1", 4L); // update f1 to 4 (but not f2) + writer.close(); + + DirectoryReader reader = DirectoryReader.open(dir); + assertEquals(4, reader.leaves().get(0).reader().getNumericDocValues("f1").get(0)); + assertEquals(3, reader.leaves().get(0).reader().getNumericDocValues("f2").get(0)); + reader.close(); + + dir.close(); + } + }