Index: /src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java =================================================================== --- /src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java (revision 785736) +++ /src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java (working copy) @@ -29,9 +29,12 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.hfile.HFile.BlockIndex; import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.RawComparator; /** @@ -244,4 +247,31 @@ writer.append("1".getBytes(), "0".getBytes()); writer.close(); } + + /** + * Checks if the HeapSize calculator is within reason + */ + public void testHeapSizeForBlockIndex() { + ClassSize cs = null; + Class cl = null; + long expected = 0L; + long actual = 0L; + try { + cs = new ClassSize(); + } catch(Exception e) {} + + //KeyValue + cl = BlockIndex.class; + expected = cs.estimateBase(cl, false); + BlockIndex bi = new BlockIndex(Bytes.BYTES_RAWCOMPARATOR); + actual = bi.heapSize(); + //Since we have a [[]] in BlockIndex and the checker only sees the [] we + // miss a MULTI_ARRAY which is 4*Reference = 32 B + actual -= 32; + if(expected != actual) { + cs.estimateBase(cl, true); + assertEquals(expected, actual); + } + } + } \ No newline at end of file Index: /src/test/org/apache/hadoop/hbase/io/TestHeapSize.java =================================================================== --- /src/test/org/apache/hadoop/hbase/io/TestHeapSize.java (revision 0) +++ /src/test/org/apache/hadoop/hbase/io/TestHeapSize.java (revision 0) @@ -0,0 +1,64 @@ +package org.apache.hadoop.hbase.io; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.io.hfile.LruBlockCache; +import org.apache.hadoop.hbase.util.ClassSize; + +import junit.framework.TestCase; + +public class TestHeapSize extends TestCase { + static final Log LOG = LogFactory.getLog(TestHeapSize.class); + // List of classes implementing HeapSize, some missing + // BatchOperation, BatchUpdate, BlockIndex, Entry, Entry, HStoreKey + // KeyValue, LruBlockCache, LruHashMap, Put + + /** + * Testing the classes that implements HeapSize. Some are not tested here + * for example BlockIndex which is tested in TestHFile + */ + public void testSizes() { + ClassSize cs = null; + Class cl = null; + long expected = 0L; + long actual = 0L; + try { + cs = new ClassSize(); + } catch(Exception e) {} + + //KeyValue + cl = KeyValue.class; + expected = cs.estimateBase(cl, false); + KeyValue kv = new KeyValue(); + actual = kv.heapSize(); + if(expected != actual) { + cs.estimateBase(cl, true); + assertEquals(expected, actual); + } + + //LruBlockCache + cl = LruBlockCache.class; + expected = cs.estimateBase(cl, false); + LruBlockCache c = new LruBlockCache(1,1,200); + //Since minimum size for the for a LruBlockCache is 1 + //we need to remove one reference from the heapsize + actual = c.heapSize() - HeapSize.REFERENCE; + if(expected != actual) { + cs.estimateBase(cl, true); + assertEquals(expected, actual); + } + + //Put + cl = Put.class; + expected = cs.estimateBase(cl, false); + Put put = new Put(); + actual = put.heapSize(); + if(expected != actual) { + cs.estimateBase(cl, true); + assertEquals(expected, actual); + } + } + +} Index: /src/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java =================================================================== --- /src/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (working copy) @@ -27,14 +27,12 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.filter.Filter.ReturnCode; -import java.io.IOException; import java.util.NavigableSet; /** * A query matcher that is specifically designed for the scan case. */ public class ScanQueryMatcher extends QueryMatcher { - private Filter filter; // have to support old style filter for now. private RowFilterInterface oldFilter; Index: /src/java/org/apache/hadoop/hbase/KeyValue.java =================================================================== --- /src/java/org/apache/hadoop/hbase/KeyValue.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/KeyValue.java (working copy) @@ -1783,7 +1783,7 @@ // HeapSize public long heapSize() { - int dataLen = bytes.length + (bytes.length % 8); + int dataLen = length + (length % 8); return HeapSize.OBJECT + HeapSize.BYTE_ARRAY + dataLen + (2 * HeapSize.INT); } Index: /src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java =================================================================== --- /src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java (working copy) @@ -266,8 +266,7 @@ * @return hit ratio (double between 0 and 1) */ public double getHitRatio() { - return (double)((double)hitCount/ - ((double)(hitCount+missCount))); + return ((double)hitCount) / ((double)(hitCount+missCount)); } /** Index: /src/java/org/apache/hadoop/hbase/io/hfile/HFile.java =================================================================== --- /src/java/org/apache/hadoop/hbase/io/hfile/HFile.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/io/hfile/HFile.java (working copy) @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.RawComparator; @@ -1333,11 +1334,14 @@ long [] blockOffsets; int [] blockDataSizes; int size = 0; - + /* Needed doing lookup on blocks. */ final RawComparator comparator; + static final int OVERHEAD = 2 * HeapSize.INT + 1 * HeapSize.MULTI_ARRAY + + 2 * HeapSize.ARRAY + (2+1) * HeapSize.REFERENCE; + /* * Shutdown default constructor */ @@ -1493,8 +1497,25 @@ } public long heapSize() { - return this.size; + long size = OVERHEAD; + + //Calculating the size of blockKeys + if(blockKeys != null) { + for(byte [] bs : blockKeys) { + size += HeapSize.MULTI_ARRAY; + size += ClassSize.alignSize(bs.length); + } + } + if(blockOffsets != null) { + size += blockOffsets.length * HeapSize.LONG; + } + if(blockDataSizes != null) { + size += blockDataSizes.length * HeapSize.INT; + } + + return size; } + } /* Index: /src/java/org/apache/hadoop/hbase/io/HeapSize.java =================================================================== --- /src/java/org/apache/hadoop/hbase/io/HeapSize.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/io/HeapSize.java (working copy) @@ -77,4 +77,5 @@ * count of payload and hosting object sizings. */ public long heapSize(); + } Index: /src/java/org/apache/hadoop/hbase/util/ClassSize.java =================================================================== --- /src/java/org/apache/hadoop/hbase/util/ClassSize.java (revision 0) +++ /src/java/org/apache/hadoop/hbase/util/ClassSize.java (revision 0) @@ -0,0 +1,204 @@ +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Class for determining the "size" of a class, an attempt to calculate the + * actual bytes that an object of this class will occupy in memory + * + * The core of this class is taken from the Derby project + */ +public class ClassSize { + static final Log LOG = LogFactory.getLog(ClassSize.class); + + private int refSize; + private static final int objectOverhead = 2; // references, not bytes! + private static final int byteSize = 1; + private static final int booleanSize = 1; + private static final int charSize = 4; // Unicode + private static final int shortSize = 4; + private static final int intSize = 4; + private static final int longSize = 8; + private static final int floatSize = 4; + private static final int doubleSize = 8; + private int minObjectSize; + + + /** + * Constructor + * @throws Exception + */ + public ClassSize() throws Exception{ + // Figure out whether this is a 32 or 64 bit machine. + Runtime runtime = Runtime.getRuntime(); + int loops = 10; + int sz = 0; + for(int i=0; i 0 ) { + break; + } + } + + refSize = ( 4 > sz) ? 4 : sz; + minObjectSize = 4*refSize; + } + + /** + * The estimate of the size of a class instance depends on whether the JVM + * uses 32 or 64 bit addresses, that is it depends on the size of an object + * reference. It is a linear function of the size of a reference, e.g. + * 24 + 5*r where r is the size of a reference (usually 4 or 8 bytes). + * + * This method returns the coefficients of the linear function, e.g. {24, 5} + * in the above example. + * + * @param cl A class whose instance size is to be estimated + * @return an array of 3 integers. The first integer is the size of the + * primitives, the second the number of arrays and the third the number of + * references. + */ + private int [] getSizeCoefficients(Class cl, boolean debug) { + int primitives = 0; + int arrays = 0; + int references = objectOverhead; + + for( ; null != cl; cl = cl.getSuperclass()) { + Field[] field = cl.getDeclaredFields(); + if( null != field) { + for( int i = 0; i < field.length; i++) { + if( ! Modifier.isStatic( field[i].getModifiers())) { + Class fieldClass = field[i].getType(); + if( fieldClass.isArray()){ + arrays++; + } + else if(! fieldClass.isPrimitive()){ + references++; + } + else {// Is simple primitive + String name = fieldClass.getName(); + + if(name.equals("int") || name.equals( "I")) + primitives += intSize; + else if(name.equals("long") || name.equals( "J")) + primitives += longSize; + else if(name.equals("boolean") || name.equals( "Z")) + primitives += booleanSize; + else if(name.equals("short") || name.equals( "S")) + primitives += shortSize; + else if(name.equals("byte") || name.equals( "B")) + primitives += byteSize; + else if(name.equals("char") || name.equals( "C")) + primitives += charSize; + else if(name.equals("float") || name.equals( "F")) + primitives += floatSize; + else if(name.equals("double") || name.equals( "D")) + primitives += doubleSize; + } + if(debug) { + if (LOG.isDebugEnabled()) { + // Write out region name as string and its encoded name. + LOG.debug(field[i].getName()+ "\n\t" +field[i].getType()); + } + } + } + } + } + } + return new int [] {primitives, arrays, references}; + } + + /** + * Estimate the static space taken up by a class instance given the + * coefficients returned by getSizeCoefficients. + * + * @param coeff the coefficients + * + * @return the size estimate, in bytes + */ + private long estimateBaseFromCoefficients(int [] coeff, boolean debug) { + int size = coeff[0] + (coeff[1]*3 + coeff[2])*refSize; + + // Round up to a multiple of 8 + size = (size + 7)/8; + size *= 8; + if(debug) { + if (LOG.isDebugEnabled()) { + // Write out region name as string and its encoded name. + LOG.debug("Primitives " + coeff[0] + ", arrays " + coeff[1] + + ", references(inlcuding " + objectOverhead + + ", for object overhead) " + coeff[2] + ", refSize " + refSize + + ", size " + size); + } + } + return (size < minObjectSize) ? minObjectSize : size; + } + + /** + * Estimate the static space taken up by the fields of a class. This includes + * the space taken up by by references (the pointer) but not by the referenced + * object. So the estimated size of an array field does not depend on the size + * of the array. Similarly the size of an object (reference) field does not + * depend on the object. + * + * @return the size estimate in bytes. + */ + public long estimateBase(Class cl, boolean debug) { + return estimateBaseFromCoefficients( getSizeCoefficients(cl, debug), debug); + } + + /** + * Tries to clear all the memory used to estimate the reference size for the + * current JVM + * @param runtime + * @param i + * @throws Exception + */ + private void cleaner(Runtime runtime, int i) throws Exception{ + Thread.sleep(i*1000); + runtime.gc();runtime.gc(); runtime.gc();runtime.gc();runtime.gc(); + runtime.runFinalization(); + } + + + /** + * Aligns a number to 8. + * @param num number to align to 8 + * @return smallest number >= input that is a multiple of 8 + */ + public static long alignSize(int num) { + if(num % 8 == 0) return num; + return (num + (8 - (num % 8))); + } + + +} Index: /src/java/org/apache/hadoop/hbase/util/Bytes.java =================================================================== --- /src/java/org/apache/hadoop/hbase/util/Bytes.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/util/Bytes.java (working copy) @@ -265,11 +265,11 @@ return result; } - public static String toStringBinary(final byte []b) { + public static String toStringBinary(final byte [] b) { return toStringBinary(b, 0, b.length); } - public static String toStringBinary(final byte []b, int off, int len) { + public static String toStringBinary(final byte [] b, int off, int len) { String result = null; try { String first = new String(b, off, len, "ISO-8859-1"); @@ -1126,4 +1126,5 @@ } return value; } + } Index: /src/java/org/apache/hadoop/hbase/client/Put.java =================================================================== --- /src/java/org/apache/hadoop/hbase/client/Put.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/client/Put.java (working copy) @@ -49,6 +49,9 @@ private Map> familyMap = new TreeMap>(Bytes.BYTES_COMPARATOR); + private static final int OVERHEAD = 1 * HeapSize.ARRAY + 2 * HeapSize.LONG + + (2+1) * HeapSize.REFERENCE; + /** Constructor for Writable. DO NOT USE */ public Put() {} @@ -246,13 +249,13 @@ //HeapSize public long heapSize() { - long totalSize = 0; - for(Map.Entry> entry : this.familyMap.entrySet()) { - for(KeyValue kv : entry.getValue()) { - totalSize += kv.heapSize(); - } - } - return totalSize; + long mapSize = this.OVERHEAD; + for(Map.Entry> entry : this.familyMap.entrySet()) { + for(KeyValue kv : entry.getValue()) { + mapSize += kv.heapSize(); + } + } + return mapSize; } //Writable Index: /src/java/org/apache/hadoop/hbase/client/Scan.java =================================================================== --- /src/java/org/apache/hadoop/hbase/client/Scan.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/client/Scan.java (working copy) @@ -85,7 +85,6 @@ public Scan(byte [] startRow, Filter filter) { this(startRow); this.filter = filter; - } /** @@ -118,7 +117,6 @@ public Scan addFamily(byte [] family) { familyMap.remove(family); familyMap.put(family, null); - return this; } @@ -204,7 +202,7 @@ * Get all available versions. */ public Scan setMaxVersions() { - this.maxVersions = Integer.MAX_VALUE; + this.maxVersions = Integer.MAX_VALUE; return this; } @@ -236,7 +234,6 @@ */ public Scan setOldFilter(RowFilterInterface filter) { oldFilter = filter; - return this; } @@ -246,7 +243,6 @@ */ public Scan setFamilyMap(Map> familyMap) { this.familyMap = familyMap; - return this; }