Index: lucene/core/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java (revision 1301361) +++ lucene/core/src/java/org/apache/lucene/util/AverageGuessMemoryModel.java (working copy) @@ -1,78 +0,0 @@ -package org.apache.lucene.util; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.IdentityHashMap; -import java.util.Map; - -/** - * An average, best guess, MemoryModel that should work okay on most systems. - * - */ -public class AverageGuessMemoryModel extends MemoryModel { - // best guess primitive sizes - private final Map,Integer> sizes = new IdentityHashMap,Integer>() { - { - put(boolean.class, Integer.valueOf(1)); - put(byte.class, Integer.valueOf(1)); - put(char.class, Integer.valueOf(2)); - put(short.class, Integer.valueOf(2)); - put(int.class, Integer.valueOf(4)); - put(float.class, Integer.valueOf(4)); - put(double.class, Integer.valueOf(8)); - put(long.class, Integer.valueOf(8)); - } - }; - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.util.MemoryModel#getArraySize() - */ - @Override - public int getArraySize() { - return 16; - } - - /* - * (non-Javadoc) - * - * @see org.apache.lucene.util.MemoryModel#getClassSize() - */ - @Override - public int getClassSize() { - return 8; - } - - /* (non-Javadoc) - * @see org.apache.lucene.util.MemoryModel#getPrimitiveSize(java.lang.Class) - */ - @Override - public int getPrimitiveSize(Class clazz) { - return sizes.get(clazz).intValue(); - } - - /* (non-Javadoc) - * @see org.apache.lucene.util.MemoryModel#getReferenceSize() - */ - @Override - public int getReferenceSize() { - return 4; - } - -} Index: lucene/core/src/java/org/apache/lucene/util/Constants.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/Constants.java (revision 1301361) +++ lucene/core/src/java/org/apache/lucene/util/Constants.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.lang.reflect.Field; import org.apache.lucene.LucenePackage; /** @@ -49,21 +50,35 @@ public static final boolean JRE_IS_MINIMUM_JAVA6 = new Boolean(true).booleanValue(); // prevent inlining in foreign class files - public static final boolean JRE_IS_64BIT; + /** True iff running on a 64bit JVM */ + public static final boolean JRE_IS_64BIT; + public static final boolean JRE_IS_MINIMUM_JAVA7; + static { - // NOTE: this logic may not be correct; if you know of a - // more reliable approach please raise it on java-dev! - final String x = System.getProperty("sun.arch.data.model"); - if (x != null) { - JRE_IS_64BIT = x.indexOf("64") != -1; - } else { - if (OS_ARCH != null && OS_ARCH.indexOf("64") != -1) { - JRE_IS_64BIT = true; + boolean is64Bit = false; + try { + final Class unsafeClass = Class.forName("sun.misc.Unsafe"); + final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe"); + unsafeField.setAccessible(true); + final Object unsafe = unsafeField.get(null); + final int addressSize = ((Number) unsafeClass.getMethod("addressSize") + .invoke(unsafe)).intValue(); + //System.out.println("Address size: " + addressSize); + is64Bit = addressSize >= 8; + } catch (Exception e) { + final String x = System.getProperty("sun.arch.data.model"); + if (x != null) { + is64Bit = x.indexOf("64") != -1; } else { - JRE_IS_64BIT = false; + if (OS_ARCH != null && OS_ARCH.indexOf("64") != -1) { + is64Bit = true; + } else { + is64Bit = false; + } } } + JRE_IS_64BIT = is64Bit; // this method only exists in Java 7: boolean v7 = true; Index: lucene/core/src/java/org/apache/lucene/util/MemoryModel.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/MemoryModel.java (revision 1301361) +++ lucene/core/src/java/org/apache/lucene/util/MemoryModel.java (working copy) @@ -1,48 +0,0 @@ -package org.apache.lucene.util; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/** - * Returns primitive memory sizes for estimating RAM usage. - * - */ -public abstract class MemoryModel { - - /** - * @return size of array beyond contents - */ - public abstract int getArraySize(); - - /** - * @return Class size overhead - */ - public abstract int getClassSize(); - - /** - * @param clazz a primitive Class - bool, byte, char, short, long, float, - * short, double, int - * @return the size in bytes of given primitive Class - */ - public abstract int getPrimitiveSize(Class clazz); - - /** - * @return size of reference - */ - public abstract int getReferenceSize(); - -} Index: lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java (revision 1301361) +++ lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java (working copy) @@ -27,8 +27,9 @@ * * Resource Usage: * - * Internally uses a Map to temporally hold a reference to every - * object seen. + * Internally uses a Set to temporally hold a reference to every + * object seen. + * This class is not thread safe, use one instance for each thread! * * If checkInterned, all Strings checked will be interned, but those * that were not already interned will be released for GC when the @@ -38,32 +39,123 @@ */ public final class RamUsageEstimator { + private static final Map,Integer> primitiveSizes = + new IdentityHashMap,Integer>(); + + public final static int NUM_BYTES_BOOLEAN = 1; + public final static int NUM_BYTES_BYTE = 1; public final static int NUM_BYTES_SHORT = 2; public final static int NUM_BYTES_INT = 4; public final static int NUM_BYTES_LONG = 8; public final static int NUM_BYTES_FLOAT = 4; public final static int NUM_BYTES_DOUBLE = 8; public final static int NUM_BYTES_CHAR = 2; - public final static int NUM_BYTES_OBJECT_HEADER = 8; - public final static int NUM_BYTES_OBJECT_REF = Constants.JRE_IS_64BIT ? 8 : 4; - public final static int NUM_BYTES_ARRAY_HEADER = NUM_BYTES_OBJECT_HEADER + NUM_BYTES_INT + NUM_BYTES_OBJECT_REF; + + public final static int NUM_BYTES_OBJECT_HEADER; + public final static int NUM_BYTES_OBJECT_REF; + public final static int NUM_BYTES_ARRAY_HEADER; + static { + int referenceSize = Constants.JRE_IS_64BIT ? 8 : 4; + int objectHeader = Constants.JRE_IS_64BIT ? 16 : 8; + // the following is objectHeader + NUM_BYTES_INT, but aligned to platform address size: + int arrayHeader = Constants.JRE_IS_64BIT ? 24 : 12; + try { + final Class unsafeClass = Class.forName("sun.misc.Unsafe"); + final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe"); + unsafeField.setAccessible(true); + final Object unsafe = unsafeField.get(null); + try { + final Method arrayIndexScaleMethod = unsafeClass.getMethod("arrayIndexScale", Class.class); + referenceSize = ((Number) arrayIndexScaleMethod.invoke(unsafe, Object[].class)).intValue(); + } catch (Exception e) { + // ignore + } + try { + final Method objectFieldOffsetMethod = unsafeClass.getMethod("objectFieldOffset", Field.class); + final Field dummyField = DummyObject.class.getDeclaredField("dummy"); + objectHeader = ((Number) objectFieldOffsetMethod.invoke(unsafe, dummyField)).intValue(); + } catch (Exception e) { + // ignore + } + try { + final Method arrayBaseOffsetMethod = unsafeClass.getMethod("arrayBaseOffset", Class.class); + // we calculate that only for byte[] arrays, it's actually the same for all types: + arrayHeader = ((Number) arrayBaseOffsetMethod.invoke(unsafe, byte[].class)).intValue(); + } catch (Exception e) { + // ignore + } + } catch (Exception e) { + // ignore + } + NUM_BYTES_OBJECT_REF = referenceSize; + NUM_BYTES_OBJECT_HEADER = objectHeader; + NUM_BYTES_ARRAY_HEADER = arrayHeader; + + primitiveSizes.put(boolean.class, Integer.valueOf(NUM_BYTES_BOOLEAN)); + primitiveSizes.put(byte.class, Integer.valueOf(NUM_BYTES_BYTE)); + primitiveSizes.put(char.class, Integer.valueOf(NUM_BYTES_CHAR)); + primitiveSizes.put(short.class, Integer.valueOf(NUM_BYTES_SHORT)); + primitiveSizes.put(int.class, Integer.valueOf(NUM_BYTES_INT)); + primitiveSizes.put(float.class, Integer.valueOf(NUM_BYTES_FLOAT)); + primitiveSizes.put(double.class, Integer.valueOf(NUM_BYTES_DOUBLE)); + primitiveSizes.put(long.class, Integer.valueOf(NUM_BYTES_LONG)); + }; - private MemoryModel memoryModel; + // Object with just one field to determine the object header size by getting the offset of the dummy field: + @SuppressWarnings("unused") + private static final class DummyObject { + public byte dummy; + } - private final Map seen; + /** Aligns an object size to be the next multiple of 8. */ + public static long alignObjectSize(long size) { + return (size + 7) & 0x7FFFFFF8; + } + + /** Returns the size in bytes of the byte[] object. */ + public static long sizeOf(byte[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + arr.length); + } + + /** Returns the size in bytes of the char[] object. */ + public static long sizeOf(char[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_CHAR * arr.length); + } - private int refSize; - private int arraySize; - private int classSize; + /** Returns the size in bytes of the short[] object. */ + public static long sizeOf(short[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_SHORT * arr.length); + } + + /** Returns the size in bytes of the int[] object. */ + public static long sizeOf(int[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_INT * arr.length); + } + + /** Returns the size in bytes of the float[] object. */ + public static long sizeOf(float[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_FLOAT * arr.length); + } + + /** Returns the size in bytes of the long[] object. */ + public static long sizeOf(long[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_LONG * arr.length); + } + + /** Returns the size in bytes of the double[] object. */ + public static long sizeOf(double[] arr) { + return alignObjectSize(NUM_BYTES_ARRAY_HEADER + NUM_BYTES_DOUBLE * arr.length); + } - private boolean checkInterned; + private final Set seen = Collections.newSetFromMap(new IdentityHashMap(64)); + private final boolean checkInterned; /** * Constructs this object with an AverageGuessMemoryModel and * checkInterned = true. */ public RamUsageEstimator() { - this(new AverageGuessMemoryModel()); + this(true); } /** @@ -73,32 +165,7 @@ * intern checking. */ public RamUsageEstimator(boolean checkInterned) { - this(new AverageGuessMemoryModel(), checkInterned); - } - - /** - * @param memoryModel MemoryModel to use for primitive object sizes. - */ - public RamUsageEstimator(MemoryModel memoryModel) { - this(memoryModel, true); - } - - /** - * @param memoryModel MemoryModel to use for primitive object sizes. - * @param checkInterned check if Strings are interned and don't add to size - * if they are. Defaults to true but if you know the objects you are checking - * won't likely contain many interned Strings, it will be faster to turn off - * intern checking. - */ - public RamUsageEstimator(MemoryModel memoryModel, boolean checkInterned) { - this.memoryModel = memoryModel; this.checkInterned = checkInterned; - // Use Map rather than Set so that we can use an IdentityHashMap - not - // seeing an IdentityHashSet - seen = new IdentityHashMap(64); - this.refSize = memoryModel.getReferenceSize(); - this.arraySize = memoryModel.getArraySize(); - this.classSize = memoryModel.getClassSize(); } public long estimateRamUsage(Object obj) { @@ -120,12 +187,12 @@ } // skip if we have seen before - if (seen.containsKey(obj)) { + if (seen.contains(obj)) { return 0; } // add to seen - seen.put(obj, null); + seen.add(obj); Class clazz = obj.getClass(); if (clazz.isArray()) { @@ -143,9 +210,9 @@ } if (fields[i].getType().isPrimitive()) { - size += memoryModel.getPrimitiveSize(fields[i].getType()); + size += primitiveSizes.get(fields[i].getType()); } else { - size += refSize; + size += NUM_BYTES_OBJECT_REF; fields[i].setAccessible(true); try { Object value = fields[i].get(obj); @@ -160,26 +227,25 @@ } clazz = clazz.getSuperclass(); } - size += classSize; - return size; + size += NUM_BYTES_OBJECT_HEADER; + return alignObjectSize(size); } private long sizeOfArray(Object obj) { + long size = NUM_BYTES_ARRAY_HEADER; int len = Array.getLength(obj); - if (len == 0) { - return 0; - } - long size = arraySize; - Class arrayElementClazz = obj.getClass().getComponentType(); - if (arrayElementClazz.isPrimitive()) { - size += len * memoryModel.getPrimitiveSize(arrayElementClazz); - } else { - for (int i = 0; i < len; i++) { - size += refSize + size(Array.get(obj, i)); + if (len > 0) { + Class arrayElementClazz = obj.getClass().getComponentType(); + if (arrayElementClazz.isPrimitive()) { + size += len * primitiveSizes.get(arrayElementClazz); + } else { + for (int i = 0; i < len; i++) { + size += NUM_BYTES_OBJECT_REF + size(Array.get(obj, i)); + } } } - return size; + return alignObjectSize(size); } private static final long ONE_KB = 1024; Index: lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java =================================================================== --- lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java (revision 1301361) +++ lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java (working copy) @@ -17,8 +17,6 @@ * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase; - public class TestRamUsageEstimator extends LuceneTestCase { public void testBasic() { @@ -35,6 +33,20 @@ rue.estimateRamUsage(strings); } + public void testReferenceSize() { + if (VERBOSE) { + System.out.println("This JVM is 64bit: " + Constants.JRE_IS_64BIT); + System.out.println("Reference size in this JVM: " + RamUsageEstimator.NUM_BYTES_OBJECT_REF); + System.out.println("Object header size in this JVM: " + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER); + System.out.println("Array header size in this JVM: " + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER); + } + assertTrue(RamUsageEstimator.NUM_BYTES_OBJECT_REF == 4 || RamUsageEstimator.NUM_BYTES_OBJECT_REF == 8); + if (!Constants.JRE_IS_64BIT) { + assertEquals("For 32bit JVMs, reference size must always be 4", 4, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + } + } + + @SuppressWarnings("unused") private static final class Holder { long field1 = 5000L; String name = "name";