Index: lucene/core/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java (revision 1301361) +++ lucene/core/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java (working copy) @@ -20,59 +20,41 @@ import java.util.IdentityHashMap; import java.util.Map; -/** - * An average, best guess, MemoryModel that should work okay on most systems. - * - */ +/** An average, best guess, MemoryModel that should work okay on most systems. */ public class AverageGuessMemoryModel extends MemoryModel { + // best guess primitive sizes private final Map,Integer> sizes = new IdentityHashMap,Integer>() { { put(boolean.class, Integer.valueOf(1)); put(byte.class, Integer.valueOf(1)); - put(char.class, Integer.valueOf(2)); - put(short.class, Integer.valueOf(2)); - put(int.class, Integer.valueOf(4)); - put(float.class, Integer.valueOf(4)); - put(double.class, Integer.valueOf(8)); - put(long.class, Integer.valueOf(8)); + put(char.class, Integer.valueOf(RamUsageEstimator.NUM_BYTES_CHAR)); + put(short.class, Integer.valueOf(RamUsageEstimator.NUM_BYTES_SHORT)); + put(int.class, Integer.valueOf(RamUsageEstimator.NUM_BYTES_INT)); + put(float.class, Integer.valueOf(RamUsageEstimator.NUM_BYTES_FLOAT)); + put(double.class, Integer.valueOf(RamUsageEstimator.NUM_BYTES_DOUBLE)); + put(long.class, Integer.valueOf(RamUsageEstimator.NUM_BYTES_LONG)); } }; - /* - * (non-Javadoc) - * - * @see org.apache.lucene.util.MemoryModel#getArraySize() - */ @Override public int getArraySize() { - return 16; + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; } - /* - * (non-Javadoc) - * - * @see org.apache.lucene.util.MemoryModel#getClassSize() - */ @Override public int getClassSize() { - return 8; + return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER; } - /* (non-Javadoc) - * @see org.apache.lucene.util.MemoryModel#getPrimitiveSize(java.lang.Class) - */ @Override public int getPrimitiveSize(Class clazz) { return sizes.get(clazz).intValue(); } - /* (non-Javadoc) - * @see org.apache.lucene.util.MemoryModel#getReferenceSize() - */ @Override public int getReferenceSize() { - return 4; + return RamUsageEstimator.NUM_BYTES_OBJECT_REF; } } Index: lucene/core/src/java/org/apache/lucene/util/Constants.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/Constants.java (revision 1301361) +++ lucene/core/src/java/org/apache/lucene/util/Constants.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.lang.reflect.Field; import org.apache.lucene.LucenePackage; /** @@ -49,21 +50,35 @@ public static final boolean JRE_IS_MINIMUM_JAVA6 = new Boolean(true).booleanValue(); // prevent inlining in foreign class files - public static final boolean JRE_IS_64BIT; + /** True iff running on a 64bit JVM */ + public static final boolean JRE_IS_64BIT; + public static final boolean JRE_IS_MINIMUM_JAVA7; + static { - // NOTE: this logic may not be correct; if you know of a - // more reliable approach please raise it on java-dev! - final String x = System.getProperty("sun.arch.data.model"); - if (x != null) { - JRE_IS_64BIT = x.indexOf("64") != -1; - } else { - if (OS_ARCH != null && OS_ARCH.indexOf("64") != -1) { - JRE_IS_64BIT = true; + boolean is64Bit = false; + try { + final Class unsafeClass = Class.forName("sun.misc.Unsafe"); + final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe"); + unsafeField.setAccessible(true); + final Object unsafe = unsafeField.get(null); + final int addressSize = ((Number) unsafeClass.getMethod("addressSize") + .invoke(unsafe)).intValue(); + //System.out.println("Address size: " + addressSize); + is64Bit = addressSize >= 8; + } catch (Exception e) { + final String x = System.getProperty("sun.arch.data.model"); + if (x != null) { + is64Bit = x.indexOf("64") != -1; } else { - JRE_IS_64BIT = false; + if (OS_ARCH != null && OS_ARCH.indexOf("64") != -1) { + is64Bit = true; + } else { + is64Bit = false; + } } } + JRE_IS_64BIT = is64Bit; // this method only exists in Java 7: boolean v7 = true; Index: lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java (revision 1301361) +++ lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java (working copy) @@ -27,8 +27,9 @@ * * Resource Usage: * - * Internally uses a Map to temporally hold a reference to every - * object seen. + * Internally uses a Set to temporally hold a reference to every + * object seen. + * This class is not thread safe, use one instance for each thread! * * If checkInterned, all Strings checked will be interned, but those * that were not already interned will be released for GC when the @@ -44,13 +45,97 @@ public final static int NUM_BYTES_FLOAT = 4; public final static int NUM_BYTES_DOUBLE = 8; public final static int NUM_BYTES_CHAR = 2; - public final static int NUM_BYTES_OBJECT_HEADER = 8; - public final static int NUM_BYTES_OBJECT_REF = Constants.JRE_IS_64BIT ? 8 : 4; - public final static int NUM_BYTES_ARRAY_HEADER = NUM_BYTES_OBJECT_HEADER + NUM_BYTES_INT + NUM_BYTES_OBJECT_REF; + + public final static int NUM_BYTES_OBJECT_HEADER; + public final static int NUM_BYTES_OBJECT_REF; + public final static int NUM_BYTES_ARRAY_HEADER; + static { + int referenceSize = Constants.JRE_IS_64BIT ? 8 : 4; + int objectHeader = Constants.JRE_IS_64BIT ? 16 : 8; + // the following is objectHeader + NUM_BYTES_INT, but aligned to platform address size: + int arrayHeader = Constants.JRE_IS_64BIT ? 24 : 12; + try { + final Class unsafeClass = Class.forName("sun.misc.Unsafe"); + final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe"); + unsafeField.setAccessible(true); + final Object unsafe = unsafeField.get(null); + try { + final Method arrayIndexScaleMethod = unsafeClass.getMethod("arrayIndexScale", Class.class); + referenceSize = ((Number) arrayIndexScaleMethod.invoke(unsafe, Object[].class)).intValue(); + } catch (Exception e) { + // ignore + } + try { + final Method objectFieldOffsetMethod = unsafeClass.getMethod("objectFieldOffset", Field.class); + final Field dummyField = DummyObject.class.getDeclaredField("dummy"); + objectHeader = ((Number) objectFieldOffsetMethod.invoke(unsafe, dummyField)).intValue(); + } catch (Exception e) { + // ignore + } + try { + final Method arrayBaseOffsetMethod = unsafeClass.getMethod("arrayBaseOffset", Class.class); + // we calculate that only for byte[] arrays, it's actually the same for all types: + arrayHeader = ((Number) arrayBaseOffsetMethod.invoke(unsafe, byte[].class)).intValue(); + } catch (Exception e) { + // ignore + } + } catch (Exception e) { + // ignore + } + NUM_BYTES_OBJECT_REF = referenceSize; + NUM_BYTES_OBJECT_HEADER = objectHeader; + NUM_BYTES_ARRAY_HEADER = arrayHeader; + } + + // Object with just one field to determine the object header size by getting the offset of the dummy field: + @SuppressWarnings("unused") + private static final class DummyObject { + public byte dummy; + } + /** Aligns an object size to be the next multiple of 8. */ + public static long alignObjectSize(long size) { + return (size + 7) & 0x7FFFFFF8; + } + + /** Returns the size in bytes of the byte[] object. */ + public static long sizeOf(byte[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + arr.length); + } + + /** Returns the size in bytes of the char[] object. */ + public static long sizeOf(char[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_CHAR * arr.length); + } + + /** Returns the size in bytes of the short[] object. */ + public static long sizeOf(short[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_SHORT * arr.length); + } + + /** Returns the size in bytes of the int[] object. */ + public static long sizeOf(int[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_INT * arr.length); + } + + /** Returns the size in bytes of the float[] object. */ + public static long sizeOf(float[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_FLOAT * arr.length); + } + + /** Returns the size in bytes of the long[] object. */ + public static long sizeOf(long[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_LONG * arr.length); + } + + /** Returns the size in bytes of the double[] object. */ + public static long sizeOf(double[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_DOUBLE * arr.length); + } + private MemoryModel memoryModel; - private final Map seen; + private final Set seen; private int refSize; private int arraySize; @@ -93,9 +178,7 @@ public RamUsageEstimator(MemoryModel memoryModel, boolean checkInterned) { this.memoryModel = memoryModel; this.checkInterned = checkInterned; - // Use Map rather than Set so that we can use an IdentityHashMap - not - // seeing an IdentityHashSet - seen = new IdentityHashMap(64); + this.seen = Collections.newSetFromMap(new IdentityHashMap(64)); this.refSize = memoryModel.getReferenceSize(); this.arraySize = memoryModel.getArraySize(); this.classSize = memoryModel.getClassSize(); @@ -120,12 +203,12 @@ } // skip if we have seen before - if (seen.containsKey(obj)) { + if (seen.contains(obj)) { return 0; } // add to seen - seen.put(obj, null); + seen.add(obj); Class clazz = obj.getClass(); if (clazz.isArray()) { @@ -161,25 +244,24 @@ clazz = clazz.getSuperclass(); } size += classSize; - return size; + return alignObjectSize(size); } private long sizeOfArray(Object obj) { + long size = arraySize; int len = Array.getLength(obj); - if (len == 0) { - return 0; - } - long size = arraySize; - Class arrayElementClazz = obj.getClass().getComponentType(); - if (arrayElementClazz.isPrimitive()) { - size += len * memoryModel.getPrimitiveSize(arrayElementClazz); - } else { - for (int i = 0; i < len; i++) { - size += refSize + size(Array.get(obj, i)); + if (len > 0) { + Class arrayElementClazz = obj.getClass().getComponentType(); + if (arrayElementClazz.isPrimitive()) { + size += len * memoryModel.getPrimitiveSize(arrayElementClazz); + } else { + for (int i = 0; i < len; i++) { + size += refSize + size(Array.get(obj, i)); + } } } - return size; + return alignObjectSize(size); } private static final long ONE_KB = 1024; Index: lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java =================================================================== --- lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java (revision 1301361) +++ lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java (working copy) @@ -17,8 +17,6 @@ * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase; - public class TestRamUsageEstimator extends LuceneTestCase { public void testBasic() { @@ -35,6 +33,20 @@ rue.estimateRamUsage(strings); } + public void testReferenceSize() { + if (VERBOSE) { + System.out.println("This JVM is 64bit: " + Constants.JRE_IS_64BIT); + System.out.println("Reference size in this JVM: " + RamUsageEstimator.NUM_BYTES_OBJECT_REF); + System.out.println("Object header size in this JVM: " + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER); + System.out.println("Array header size in this JVM: " + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER); + } + assertTrue(RamUsageEstimator.NUM_BYTES_OBJECT_REF == 4 || RamUsageEstimator.NUM_BYTES_OBJECT_REF == 8); + if (!Constants.JRE_IS_64BIT) { + assertEquals("For 32bit JVMs, reference size must always be 4", 4, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + } + } + + @SuppressWarnings("unused") private static final class Holder { long field1 = 5000L; String name = "name";