From a1b49e2b82b21ae04575a76e631b4ecdb8884dcd Mon Sep 17 00:00:00 2001 From: Areek Zillur Date: Mon, 2 Sep 2013 02:19:16 -0700 Subject: [PATCH 1/6] Introduce ramBytesUsed to monitor index heap memory usage --- .../lucene/codecs/blockterms/BlockTermsReader.java | 7 +++ .../blockterms/FixedGapTermsIndexReader.java | 16 ++++++ .../codecs/blockterms/TermsIndexReaderBase.java | 3 ++ .../blockterms/VariableGapTermsIndexReader.java | 14 +++++ .../codecs/bloom/BloomFilteringPostingsFormat.java | 11 ++++ .../org/apache/lucene/codecs/bloom/FuzzySet.java | 6 ++- .../lucene/codecs/memory/DirectPostingsFormat.java | 63 ++++++++++++++++++++++ .../codecs/memory/MemoryDocValuesProducer.java | 9 ++++ .../lucene/codecs/memory/MemoryPostingsFormat.java | 16 ++++++ .../codecs/pulsing/PulsingPostingsReader.java | 5 ++ .../lucene/codecs/sep/SepPostingsReader.java | 5 ++ .../simpletext/SimpleTextDocValuesReader.java | 6 +++ .../codecs/simpletext/SimpleTextFieldsReader.java | 5 ++ .../simpletext/SimpleTextStoredFieldsReader.java | 5 ++ .../simpletext/SimpleTextTermVectorsReader.java | 5 ++ .../apache/lucene/codecs/BlockTreeTermsReader.java | 14 +++++ .../apache/lucene/codecs/DocValuesProducer.java | 3 ++ .../org/apache/lucene/codecs/FieldsProducer.java | 3 ++ .../apache/lucene/codecs/PostingsReaderBase.java | 4 ++ .../apache/lucene/codecs/StoredFieldsReader.java | 3 ++ .../apache/lucene/codecs/TermVectorsReader.java | 3 ++ .../CompressingStoredFieldsIndexReader.java | 19 +++++++ .../compressing/CompressingStoredFieldsReader.java | 5 ++ .../compressing/CompressingTermVectorsReader.java | 6 +++ .../codecs/lucene40/Lucene40DocValuesReader.java | 5 ++ .../codecs/lucene40/Lucene40PostingsReader.java | 6 +++ .../lucene40/Lucene40StoredFieldsReader.java | 5 ++ .../codecs/lucene40/Lucene40TermVectorsReader.java | 5 ++ .../codecs/lucene41/Lucene41PostingsReader.java | 6 +++ .../codecs/lucene42/Lucene42DocValuesProducer.java | 9 ++++ .../codecs/lucene45/Lucene45DocValuesProducer.java | 13 ++++- .../codecs/perfield/PerFieldDocValuesFormat.java | 15 ++++++ .../codecs/perfield/PerFieldPostingsFormat.java | 15 ++++++ .../java/org/apache/lucene/util/FixedBitSet.java | 5 ++ .../java/org/apache/lucene/util/PagedBytes.java | 5 ++ .../lucene/util/packed/BlockPackedReader.java | 8 +++ .../util/packed/MonotonicBlockPackedReader.java | 12 +++++ .../codecs/facet42/Facet42BinaryDocValues.java | 5 ++ .../codecs/facet42/Facet42DocValuesProducer.java | 9 ++++ .../codecs/asserting/AssertingDocValuesFormat.java | 6 +++ .../codecs/asserting/AssertingPostingsFormat.java | 5 ++ .../asserting/AssertingStoredFieldsFormat.java | 5 ++ .../asserting/AssertingTermVectorsFormat.java | 5 ++ .../codecs/ramonly/RAMOnlyPostingsFormat.java | 41 ++++++++++++++ 44 files changed, 419 insertions(+), 2 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java index 7fa0e14..de16b57 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java @@ -842,4 +842,11 @@ public class BlockTermsReader extends FieldsProducer { } } } + + @Override + public long ramBytesUsed() { + long sizeInBytes = postingsReader.ramBytesUsed();; + sizeInBytes += indexReader.ramBytesUsed(); + return sizeInBytes; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java index 0d69e94..a6db6cf 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java @@ -256,6 +256,11 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { clone.close(); } } + + public long getSizeInBytes() { + return ((termOffsets!=null)? termOffsets.ramBytesUsed() : 0) + + ((termsDictOffsets!=null)? termsDictOffsets.ramBytesUsed() : 0); + } } @Override @@ -271,4 +276,15 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { dirOffset = input.readLong(); input.seek(dirOffset); } + + @Override + public long ramBytesUsed() { + long sizeInBytes = ((termBytes!=null) ? termBytes.ramBytesUsed() : 0) + + ((termBytesReader!=null)? termBytesReader.ramBytesUsed() : 0); + + for(FieldIndexData entry : fields.values()) { + sizeInBytes += entry.getSizeInBytes(); + } + return sizeInBytes; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/TermsIndexReaderBase.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/TermsIndexReaderBase.java index 4a8d96f..330ef1a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/TermsIndexReaderBase.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/TermsIndexReaderBase.java @@ -70,4 +70,7 @@ public abstract class TermsIndexReaderBase implements Closeable { /** Only implemented if {@link TermsIndexReaderBase#supportsOrd()} returns true. */ public abstract long ord(); } + + /** Returns approximate RAM bytes used */ + public abstract long ramBytesUsed(); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java index 10d2abb..e877fc5 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java @@ -169,6 +169,11 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { w.close(); */ } + + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + return fst.sizeInBytes(); + } } @Override @@ -191,4 +196,13 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { } input.seek(dirOffset); } + + @Override + public long ramBytesUsed() { + long sizeInBytes = 0; + for(FieldIndexData entry : fields.values()) { + sizeInBytes += entry.ramBytesUsed(); + } + return sizeInBytes; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java index 982b72a..38b8602 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java @@ -48,6 +48,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.automaton.CompiledAutomaton; /** @@ -390,6 +391,16 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat { } + + @Override + public long ramBytesUsed() { + long sizeInBytes = ((delegateFieldsProducer!=null) ? delegateFieldsProducer.ramBytesUsed() : 0); + for(Map.Entry entry: bloomsByFieldName.entrySet()) { + sizeInBytes += entry.getKey().length() * RamUsageEstimator.NUM_BYTES_CHAR; + sizeInBytes += entry.getValue().ramBytesUsed(); + } + return sizeInBytes; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java index 835365b..665c894 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java @@ -302,4 +302,8 @@ public class FuzzySet { int numBitsSet = filter.cardinality(); return (float) numBitsSet / (float) bloomSize; } -} \ No newline at end of file + + public long ramBytesUsed() { + return filter.ramBytesUsed(); + } +} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index b19b151..584cd9f 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -44,6 +44,7 @@ import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.RunAutomaton; import org.apache.lucene.util.automaton.Transition; @@ -147,12 +148,25 @@ public final class DirectPostingsFormat extends PostingsFormat { @Override public void close() { } + + @Override + public long ramBytesUsed() { + long sizeInBytes = 0; + for(Map.Entry entry: fields.entrySet()) { + sizeInBytes += entry.getKey().length() * RamUsageEstimator.NUM_BYTES_CHAR; + sizeInBytes += entry.getValue().ramBytesUsed(); + } + return sizeInBytes; + } } private final static class DirectField extends Terms { private static abstract class TermAndSkip { public int[] skips; + + /** Returns the approximate number of RAM bytes used */ + public abstract long ramBytesUsed(); } private static final class LowFreqTerm extends TermAndSkip { @@ -167,6 +181,12 @@ public final class DirectPostingsFormat extends PostingsFormat { this.docFreq = docFreq; this.totalTermFreq = totalTermFreq; } + + @Override + public long ramBytesUsed() { + return ((postings!=null) ? RamUsageEstimator.sizeOf(postings) : 0) + + ((payloads!=null) ? RamUsageEstimator.sizeOf(payloads) : 0); + } } // TODO: maybe specialize into prx/no-prx/no-frq cases? @@ -184,6 +204,31 @@ public final class DirectPostingsFormat extends PostingsFormat { this.payloads = payloads; this.totalTermFreq = totalTermFreq; } + + @Override + public long ramBytesUsed() { + long sizeInBytes = 0; + sizeInBytes += (docIDs!=null)? RamUsageEstimator.sizeOf(docIDs) : 0; + sizeInBytes += (freqs!=null)? RamUsageEstimator.sizeOf(freqs) : 0; + + if(positions != null) { + for(int[] position : positions) { + sizeInBytes += (position!=null) ? RamUsageEstimator.sizeOf(position) : 0; + } + } + + if (payloads != null) { + for(byte[][] payload : payloads) { + if(payload != null) { + for(byte[] pload : payload) { + sizeInBytes += (pload!=null) ? RamUsageEstimator.sizeOf(pload) : 0; + } + } + } + } + + return sizeInBytes; + } } private final byte[] termBytes; @@ -444,6 +489,24 @@ public final class DirectPostingsFormat extends PostingsFormat { assert skipOffset == skipCount; } + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + long sizeInBytes = 0; + sizeInBytes += ((termBytes!=null) ? RamUsageEstimator.sizeOf(termBytes) : 0); + sizeInBytes += ((termOffsets!=null) ? RamUsageEstimator.sizeOf(termOffsets) : 0); + sizeInBytes += ((skips!=null) ? RamUsageEstimator.sizeOf(skips) : 0); + sizeInBytes += ((skipOffsets!=null) ? RamUsageEstimator.sizeOf(skipOffsets) : 0); + sizeInBytes += ((sameCounts!=null) ? RamUsageEstimator.sizeOf(sameCounts) : 0); + + if(terms!=null) { + for(TermAndSkip termAndSkip : terms) { + sizeInBytes += (termAndSkip!=null) ? termAndSkip.ramBytesUsed() : 0; + } + } + + return sizeInBytes; + } + // Compares in unicode (UTF8) order: int compare(int ord, BytesRef other) { final byte[] otherBytes = other.bytes; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java index 46ed8b8..d4b837e 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesProducer.java @@ -202,6 +202,15 @@ class MemoryDocValuesProducer extends DocValuesProducer { return instance; } + @Override + public long ramBytesUsed() { + long sizeInBytes = 0; + for(FST fst : fstInstances.values()) { + sizeInBytes += fst.sizeInBytes() + Integer.SIZE; + } + return sizeInBytes; + } + private NumericDocValues loadNumeric(FieldInfo field) throws IOException { NumericEntry entry = numerics.get(field.number); data.seek(entry.offset + entry.missingBytes); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java index 37c4bd7..8f2611e 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; +import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; @@ -49,6 +50,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.BytesRefFSTEnum; @@ -843,6 +845,10 @@ public final class MemoryPostingsFormat extends PostingsFormat { public boolean hasPayloads() { return field.hasPayloads(); } + + public long ramBytesUsed() { + return ((fst!=null) ? fst.sizeInBytes() : 0); + } } @Override @@ -889,6 +895,16 @@ public final class MemoryPostingsFormat extends PostingsFormat { termsReader.fst = null; } } + + @Override + public long ramBytesUsed() { + long sizeInBytes = 0; + for(Map.Entry entry: fields.entrySet()) { + sizeInBytes += (entry.getKey().length() * RamUsageEstimator.NUM_BYTES_CHAR); + sizeInBytes += entry.getValue().ramBytesUsed(); + } + return sizeInBytes; + } }; } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java index 6623b4e..9aacfae 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java @@ -630,4 +630,9 @@ public class PulsingPostingsReader extends PostingsReaderBase { // we don't want to copy any stuff over to another docsenum ever! } } + + @Override + public long ramBytesUsed() { + return ((wrappedPostingsReader!=null) ? wrappedPostingsReader.ramBytesUsed(): 0); + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java index 9334dca..8293e8a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java @@ -728,4 +728,9 @@ public class SepPostingsReader extends PostingsReaderBase { return docFreq; } } + + @Override + public long ramBytesUsed() { + return 0; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java index 3753a62..1a89b04 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java @@ -62,6 +62,7 @@ class SimpleTextDocValuesReader extends DocValuesProducer { boolean fixedLength; long minValue; long numValues; + }; final int maxDoc; @@ -464,4 +465,9 @@ class SimpleTextDocValuesReader extends DocValuesProducer { private String stripPrefix(BytesRef prefix) throws IOException { return new String(scratch.bytes, scratch.offset + prefix.length, scratch.length - prefix.length, "UTF-8"); } + + @Override + public long ramBytesUsed() { + return 0; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index 74b1d79..d63b62a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -656,4 +656,9 @@ class SimpleTextFieldsReader extends FieldsProducer { public void close() throws IOException { in.close(); } + + @Override + public long ramBytesUsed() { + return 0; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java index 5e36fae..ea31336 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java @@ -192,4 +192,9 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader { return a.length == b.length - bOffset && ArrayUtil.equals(a.bytes, a.offset, b.bytes, b.offset + bOffset, b.length - bOffset); } + + @Override + public long ramBytesUsed() { + return 0; + } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index 2c33a0f..b650f1c 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -538,4 +538,9 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { return 1; } } + + @Override + public long ramBytesUsed() { + return 0; + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java index fbe3400..e3e2cef 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java @@ -547,6 +547,11 @@ public class BlockTreeTermsReader extends FieldsProducer { return new IntersectEnum(compiled, startTerm); } + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + return ((index!=null)? index.sizeInBytes() : 0); + } + // NOTE: cannot seek! private final class IntersectEnum extends TermsEnum { private final IndexInput in; @@ -2929,4 +2934,13 @@ public class BlockTreeTermsReader extends FieldsProducer { } } } + + @Override + public long ramBytesUsed() { + long sizeInByes = ((postingsReader!=null) ? postingsReader.ramBytesUsed() : 0); + for(FieldReader reader : fields.values()) { + sizeInByes += reader.ramBytesUsed(); + } + return sizeInByes; + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java index 05dfcf1..b492a9e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java @@ -64,6 +64,9 @@ public abstract class DocValuesProducer implements Closeable { * used by a single thread. */ public abstract Bits getDocsWithField(FieldInfo field) throws IOException; + /** Returns approximate RAM bytes used */ + public abstract long ramBytesUsed(); + /** * A simple implementation of {@link DocValuesProducer#getDocsWithField} that * returns {@code true} if a document has an ordinal >= 0 diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java index 6c55904..d81af81 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java @@ -36,4 +36,7 @@ public abstract class FieldsProducer extends Fields implements Closeable { @Override public abstract void close() throws IOException; + + /** Returns approximate RAM bytes used */ + public abstract long ramBytesUsed(); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java index b8ea7f2..2340967 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java @@ -73,4 +73,8 @@ public abstract class PostingsReaderBase implements Closeable { * method should merely load the byte[] blob but not * decode, which is done in {@link #nextTerm}. */ public abstract void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState termState) throws IOException; + + /** Returns approximate RAM bytes used */ + public abstract long ramBytesUsed(); + } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java index 7bc8df3..315f574 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java @@ -40,4 +40,7 @@ public abstract class StoredFieldsReader implements Cloneable, Closeable { @Override public abstract StoredFieldsReader clone(); + + /** Returns approximate RAM bytes used */ + public abstract long ramBytesUsed(); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java index 46a7a04..95472cb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsReader.java @@ -42,6 +42,9 @@ public abstract class TermVectorsReader implements Cloneable, Closeable { * available from the {@link DocsAndPositionsEnum}. */ public abstract Fields get(int doc) throws IOException; + /** Returns approximate RAM bytes used */ + public abstract long ramBytesUsed(); + /** Create a clone that one caller at a time may use to * read term vectors. */ @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java index c73942f..ed21766 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.PackedInts; /** @@ -160,5 +161,23 @@ public final class CompressingStoredFieldsIndexReader implements Cloneable { public CompressingStoredFieldsIndexReader clone() { return this; } + + public long ramBytesUsed() { + long res = 0; + + for(PackedInts.Reader r : docBasesDeltas) { + res += r.ramBytesUsed(); + } + for(PackedInts.Reader r : startPointersDeltas) { + res += r.ramBytesUsed(); + } + + res += RamUsageEstimator.sizeOf(docBases); + res += RamUsageEstimator.sizeOf(startPointers); + res += RamUsageEstimator.sizeOf(avgChunkDocs); + res += RamUsageEstimator.sizeOf(avgChunkSizes); + + return res; + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java index 29fed88..2e394eb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java @@ -410,4 +410,9 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { } + @Override + public long ramBytesUsed() { + return indexReader.ramBytesUsed(); + } + } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java index 073e60c..e3bcd83 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java @@ -57,6 +57,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.BlockPackedReaderIterator; import org.apache.lucene.util.packed.PackedInts; @@ -1041,4 +1042,9 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem return sum; } + @Override + public long ramBytesUsed() { + return indexReader.ramBytesUsed(); + } + } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java index 5461770..1ba8bce 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java @@ -630,4 +630,9 @@ final class Lucene40DocValuesReader extends DocValuesProducer { public void close() throws IOException { dir.close(); } + + @Override + public long ramBytesUsed() { + return 0; + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java index 9c2c86f..2cc777b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java @@ -1195,4 +1195,10 @@ public class Lucene40PostingsReader extends PostingsReaderBase { return limit; } } + + @Override + public long ramBytesUsed() { + return 0; + } + } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java index a5d7b5b..651bbde 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java @@ -244,4 +244,9 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme return fieldsStream; } + + @Override + public long ramBytesUsed() { + return 0; + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java index 11f6b3d..bb0b2c5 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java @@ -763,5 +763,10 @@ public class Lucene40TermVectorsReader extends TermVectorsReader implements Clos return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs); } + + @Override + public long ramBytesUsed() { + return 0; + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java index 500ab20..f7dc0cb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java @@ -1604,4 +1604,10 @@ public final class Lucene41PostingsReader extends PostingsReaderBase { return docFreq; } } + + @Override + public long ramBytesUsed() { + return 0; + } + } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java index 7b111c5..9bb57ca 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java @@ -188,6 +188,15 @@ class Lucene42DocValuesProducer extends DocValuesProducer { return instance; } + @Override + public long ramBytesUsed() { + long sizeInBytes = 0; + for(FST fst: fstInstances.values()) { + sizeInBytes += Integer.SIZE + fst.sizeInBytes(); + } + return sizeInBytes; + } + private NumericDocValues loadNumeric(FieldInfo field) throws IOException { NumericEntry entry = numerics.get(field.number); data.seek(entry.offset); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java index b12fa6d..620997a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java @@ -20,7 +20,6 @@ package org.apache.lucene.codecs.lucene45; import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.DELTA_COMPRESSED; import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.GCD_COMPRESSED; import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.TABLE_COMPRESSED; - import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED; import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED; import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED; @@ -244,6 +243,18 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos return getNumeric(entry); } + @Override + public long ramBytesUsed() { + long sizeInBytes = 0; + for(MonotonicBlockPackedReader monotonicBlockPackedReader: addressInstances.values()) { + sizeInBytes += Integer.SIZE + monotonicBlockPackedReader.ramBytesUsed(); + } + for(MonotonicBlockPackedReader monotonicBlockPackedReader: ordIndexInstances.values()) { + sizeInBytes += Integer.SIZE + monotonicBlockPackedReader.ramBytesUsed(); + } + return sizeInBytes; + } + LongNumericDocValues getNumeric(NumericEntry entry) throws IOException { final IndexInput data = this.data.clone(); data.seek(entry.offset); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java index 3ed6797..955a4a9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java @@ -39,6 +39,7 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; /** * Enables per field docvalues support. @@ -282,6 +283,20 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat { public DocValuesProducer clone() { return new FieldsReader(this); } + + @Override + public long ramBytesUsed() { + long size = 0; + for (Map.Entry entry : fields.entrySet()) { + size += (entry.getKey().length() * RamUsageEstimator.NUM_BYTES_CHAR) + + entry.getValue().ramBytesUsed(); + } + for (Map.Entry entry : formats.entrySet()) { + size += (entry.getKey().length() * RamUsageEstimator.NUM_BYTES_CHAR) + + entry.getValue().ramBytesUsed(); + } + return size; + } } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java index 1963424..583fcdb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java @@ -35,6 +35,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Terms; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; /** * Enables per field postings support. @@ -225,6 +226,20 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { public void close() throws IOException { IOUtils.close(formats.values()); } + + @Override + public long ramBytesUsed() { + long sizeInBytes = 0; + for(Map.Entry entry: fields.entrySet()) { + sizeInBytes += entry.getKey().length() * RamUsageEstimator.NUM_BYTES_CHAR; + sizeInBytes += entry.getValue().ramBytesUsed(); + } + for(Map.Entry entry: formats.entrySet()) { + sizeInBytes += entry.getKey().length() * RamUsageEstimator.NUM_BYTES_CHAR; + sizeInBytes += entry.getValue().ramBytesUsed(); + } + return sizeInBytes; + } } @Override diff --git a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java index 42ad096..55413af 100644 --- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java @@ -433,4 +433,9 @@ public final class FixedBitSet extends DocIdSet implements Bits { // empty sets from returning 0, which is too common. return (int) ((h>>32) ^ h) + 0x98761234; } + + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + return ((bits!=null) ? RamUsageEstimator.sizeOf(bits) : 0); + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java index 429d274..f885cb7 100644 --- a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java +++ b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java @@ -126,6 +126,11 @@ public final class PagedBytes { assert b.length > 0; } } + + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + return ((blocks!=null) ? (blockSize * blocks.length) : 0); + } } /** 1<<blockBits must be bigger than biggest single diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java b/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java index ff35ec1..a40e20e 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java @@ -85,4 +85,12 @@ public final class BlockPackedReader { return (minValues == null ? 0 : minValues[block]) + subReaders[block].get(idx); } + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + long size = 0; + for (PackedInts.Reader reader : subReaders) { + size += reader.ramBytesUsed(); + } + return size; + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java b/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java index 7eec87b..8bc33e4 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java @@ -26,6 +26,7 @@ import static org.apache.lucene.util.packed.PackedInts.numBlocks; import java.io.IOException; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.RamUsageEstimator; /** * Provides random access to a stream written with @@ -83,5 +84,16 @@ public final class MonotonicBlockPackedReader { public long size() { return valueCount; } + + /** Returns the approximate RAM bytes used */ + public long ramBytesUsed() { + long sizeInBytes = 0; + sizeInBytes += RamUsageEstimator.sizeOf(minValues); + sizeInBytes += RamUsageEstimator.sizeOf(averages); + for(PackedInts.Reader reader: subReaders) { + sizeInBytes += reader.ramBytesUsed(); + } + return sizeInBytes; + } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java index 192f910..147def9 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42BinaryDocValues.java @@ -44,4 +44,9 @@ class Facet42BinaryDocValues extends BinaryDocValues { ret.length = (int) (addresses.get(docID+1)-start); } + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + return addresses.ramBytesUsed(); + } + } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java index 80daa1c..ac8a768 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/Facet42DocValuesProducer.java @@ -91,4 +91,13 @@ class Facet42DocValuesProducer extends DocValuesProducer { @Override public void close() throws IOException { } + + @Override + public long ramBytesUsed() { + long size = 0; + for (Facet42BinaryDocValues entry: fields.values()) { + size += entry.ramBytesUsed() + Integer.SIZE; + } + return size; + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java index f3525a4..c33aee4 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java @@ -37,6 +37,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.RamUsageEstimator; /** * Just like {@link Lucene45DocValuesFormat} but with additional asserts. @@ -301,5 +302,10 @@ public class AssertingDocValuesFormat extends DocValuesFormat { public void close() throws IOException { in.close(); } + + @Override + public long ramBytesUsed() { + return ((in!=null) ? in.ramBytesUsed() : 0); + } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java index 94b8811..6983fda 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java @@ -86,6 +86,11 @@ public final class AssertingPostingsFormat extends PostingsFormat { public int size() { return in.size(); } + + @Override + public long ramBytesUsed() { + return (in!=null) ? in.ramBytesUsed() : 0; + } } static class AssertingFieldsConsumer extends FieldsConsumer { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java index 6fa8248..868e573 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java @@ -71,6 +71,11 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat { public StoredFieldsReader clone() { return new AssertingStoredFieldsReader(in.clone(), maxDoc); } + + @Override + public long ramBytesUsed() { + return ((in!=null) ? in.ramBytesUsed() : 0); + } } enum Status { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java index 7bdf85b..26bc40b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java @@ -71,6 +71,11 @@ public class AssertingTermVectorsFormat extends TermVectorsFormat { public TermVectorsReader clone() { return new AssertingTermVectorsReader(in.clone()); } + + @Override + public long ramBytesUsed() { + return (in!=null)?in.ramBytesUsed():0; + } } enum Status { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java index 6d35683..0dcc75e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java @@ -50,6 +50,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; /** Stores all postings data in RAM, but writes a small * token (header + single int) to identify which "slot" the @@ -120,6 +121,15 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { @Override public void close() { } + + @Override + public long ramBytesUsed() { + long sizeInBytes = 0; + for(RAMField field : fieldToTerms.values()) { + sizeInBytes += field.ramBytesUsed(); + } + return sizeInBytes; + } } static class RAMField extends Terms { @@ -135,6 +145,15 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { this.info = info; } + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + long sizeInBytes = 0; + for(RAMTerm term : termToDocs.values()) { + sizeInBytes += term.ramBytesUsed(); + } + return sizeInBytes; + } + @Override public long size() { return termToDocs.size(); @@ -188,6 +207,15 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { public RAMTerm(String term) { this.term = term; } + + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + long sizeInBytes = 0; + for(RAMDoc rDoc : docs) { + sizeInBytes += rDoc.ramBytesUsed(); + } + return sizeInBytes; + } } static class RAMDoc { @@ -199,6 +227,19 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat { this.docID = docID; positions = new int[freq]; } + + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + long sizeInBytes = 0; + sizeInBytes += (positions!=null) ? RamUsageEstimator.sizeOf(positions) : 0; + + if (payloads != null) { + for(byte[] payload: payloads) { + sizeInBytes += (payload!=null) ? RamUsageEstimator.sizeOf(payload) : 0; + } + } + return sizeInBytes; + } } // Classes for writing to the postings state -- 1.8.3.2 From d6078d5dfd40c5a09819755efdebb4df08c1b519 Mon Sep 17 00:00:00 2001 From: Areek Zillur Date: Mon, 2 Sep 2013 02:22:45 -0700 Subject: [PATCH 2/6] minor fix --- .../java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java index de16b57..cee2230 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java @@ -845,8 +845,8 @@ public class BlockTermsReader extends FieldsProducer { @Override public long ramBytesUsed() { - long sizeInBytes = postingsReader.ramBytesUsed();; - sizeInBytes += indexReader.ramBytesUsed(); + long sizeInBytes = (postingsReader!=null) ? postingsReader.ramBytesUsed() : 0; + sizeInBytes += (indexReader!=null) ? indexReader.ramBytesUsed() : 0; return sizeInBytes; } } -- 1.8.3.2 From 54a51fe0ce5b7a40326f950697d61b1f85a32b4a Mon Sep 17 00:00:00 2001 From: Areek Zillur Date: Mon, 2 Sep 2013 16:27:14 -0700 Subject: [PATCH 3/6] Added ramBytesUsed method to SegmentReader and SegmentCoreReaders to get approximate RAM bytes used --- .../core/src/java/org/apache/lucene/index/SegmentCoreReaders.java | 8 ++++++++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java index 3a526aa..4fa1273 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -354,6 +354,14 @@ final class SegmentCoreReaders { coreClosedListeners.remove(listener); } + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + return ((dvProducer!=null) ? dvProducer.ramBytesUsed() : 0) + + ((normsProducer!=null) ? normsProducer.ramBytesUsed() : 0) + + ((fields!=null) ? fields.ramBytesUsed() : 0) + + ((fieldsReaderOrig!=null)? fieldsReaderOrig.ramBytesUsed() : 0) + + ((termVectorsReaderOrig!=null) ? termVectorsReaderOrig.ramBytesUsed() : 0); + } @Override public String toString() { return "SegmentCoreReader(owner=" + owner + ")"; diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java index c6cf702..694a584 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java @@ -283,4 +283,9 @@ public final class SegmentReader extends AtomicReader { ensureOpen(); core.removeCoreClosedListener(listener); } + + /** Returns approximate RAM Bytes used */ + public long ramBytesUsed() { + return (core!=null) ? core.ramBytesUsed() : 0; + } } -- 1.8.3.2 From ae036487b1862235c2b2b9c5b22bca977d05d167 Mon Sep 17 00:00:00 2001 From: Areek Zillur Date: Tue, 3 Sep 2013 11:07:24 -0700 Subject: [PATCH 4/6] Removed redundant null checks --- .../org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java | 2 +- .../org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java | 2 +- .../org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java | 2 +- .../org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java index c33aee4..7095d17 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java @@ -305,7 +305,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat { @Override public long ramBytesUsed() { - return ((in!=null) ? in.ramBytesUsed() : 0); + return in.ramBytesUsed(); } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java index 6983fda..ea8240f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java @@ -89,7 +89,7 @@ public final class AssertingPostingsFormat extends PostingsFormat { @Override public long ramBytesUsed() { - return (in!=null) ? in.ramBytesUsed() : 0; + return in.ramBytesUsed(); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java index 868e573..b637559 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java @@ -74,7 +74,7 @@ public class AssertingStoredFieldsFormat extends StoredFieldsFormat { @Override public long ramBytesUsed() { - return ((in!=null) ? in.ramBytesUsed() : 0); + return in.ramBytesUsed(); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java index 26bc40b..b7bf216 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java @@ -74,7 +74,7 @@ public class AssertingTermVectorsFormat extends TermVectorsFormat { @Override public long ramBytesUsed() { - return (in!=null)?in.ramBytesUsed():0; + return in.ramBytesUsed(); } } -- 1.8.3.2 From c79e007a8620a3e2ace7abefdfd7391cb86df27a Mon Sep 17 00:00:00 2001 From: Areek Zillur Date: Wed, 4 Sep 2013 10:46:13 -0700 Subject: [PATCH 5/6] Renamed getSizeInBytes to ramBytesUsed --- .../apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java index a6db6cf..621d9ce 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java @@ -257,7 +257,8 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { } } - public long getSizeInBytes() { + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { return ((termOffsets!=null)? termOffsets.ramBytesUsed() : 0) + ((termsDictOffsets!=null)? termsDictOffsets.ramBytesUsed() : 0); } @@ -283,7 +284,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { ((termBytesReader!=null)? termBytesReader.ramBytesUsed() : 0); for(FieldIndexData entry : fields.values()) { - sizeInBytes += entry.getSizeInBytes(); + sizeInBytes += entry.ramBytesUsed(); } return sizeInBytes; } -- 1.8.3.2 From e840a0a4f3f959f32d65b55572dd4044c4c7cc6e Mon Sep 17 00:00:00 2001 From: Areek Zillur Date: Wed, 4 Sep 2013 12:50:43 -0700 Subject: [PATCH 6/6] Take into account termsCache in SimpleTextFieldReader for index heap size --- .../lucene/codecs/simpletext/SimpleTextFieldsReader.java | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index d63b62a..fa9a78a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -574,6 +574,11 @@ class SimpleTextFieldsReader extends FieldsProducer { */ //System.out.println("FST " + fst.sizeInBytes()); } + + /** Returns approximate RAM bytes used */ + public long ramBytesUsed() { + return (fst!=null) ? fst.sizeInBytes() : 0; + } @Override public TermsEnum iterator(TermsEnum reuse) throws IOException { @@ -630,7 +635,7 @@ class SimpleTextFieldsReader extends FieldsProducer { return Collections.unmodifiableSet(fields.keySet()).iterator(); } - private final Map termsCache = new HashMap(); + private final Map termsCache = new HashMap(); @Override synchronized public Terms terms(String field) throws IOException { @@ -641,7 +646,7 @@ class SimpleTextFieldsReader extends FieldsProducer { return null; } else { terms = new SimpleTextTerms(field, fp); - termsCache.put(field, terms); + termsCache.put(field, (SimpleTextTerms) terms); } } return terms; @@ -659,6 +664,10 @@ class SimpleTextFieldsReader extends FieldsProducer { @Override public long ramBytesUsed() { - return 0; + long sizeInBytes = 0; + for(SimpleTextTerms simpleTextTerms : termsCache.values()) { + sizeInBytes += (simpleTextTerms!=null) ? simpleTextTerms.ramBytesUsed() : 0; + } + return sizeInBytes; } } -- 1.8.3.2