Index: /src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java =================================================================== --- /src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java (revision 785736) +++ /src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java (working copy) @@ -29,9 +29,12 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.hfile.HFile.BlockIndex; import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.RawComparator; /** @@ -244,4 +247,31 @@ writer.append("1".getBytes(), "0".getBytes()); writer.close(); } + + /** + * Checks if the HeapSize calculator is within reason + */ + public void testHeapSizeForBlockIndex() { + ClassSize cs = null; + Class cl = null; + long expected = 0L; + long actual = 0L; + try { + cs = new ClassSize(); + } catch(Exception e) {} + + //KeyValue + cl = BlockIndex.class; + expected = cs.estimateBase(cl, false); + BlockIndex bi = new BlockIndex(Bytes.BYTES_RAWCOMPARATOR); + actual = bi.heapSize(); + //Since we have a [[]] in BlockIndex and the checker only sees the [] we + // miss a MULTI_ARRAY which is 4*Reference = 32 B + actual -= 32; + if(expected != actual) { + cs.estimateBase(cl, true); + assertEquals(expected, actual); + } + } + } \ No newline at end of file Index: /src/test/org/apache/hadoop/hbase/io/TestHeapSize.java =================================================================== --- /src/test/org/apache/hadoop/hbase/io/TestHeapSize.java (revision 0) +++ /src/test/org/apache/hadoop/hbase/io/TestHeapSize.java (revision 0) @@ -0,0 +1,64 @@ +package org.apache.hadoop.hbase.io; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.io.hfile.LruBlockCache; +import org.apache.hadoop.hbase.util.ClassSize; + +import junit.framework.TestCase; + +public class TestHeapSize extends TestCase { + static final Log LOG = LogFactory.getLog(TestHeapSize.class); + // List of classes implementing HeapSize, some missing + // BatchOperation, BatchUpdate, BlockIndex, Entry, Entry, HStoreKey + // KeyValue, LruBlockCache, LruHashMap, Put + + /** + * Testing the classes that implements HeapSize. Some are not tested here + * for example BlockIndex which is tested in TestHFile + */ + public void testSizes() { + ClassSize cs = null; + Class cl = null; + long expected = 0L; + long actual = 0L; + try { + cs = new ClassSize(); + } catch(Exception e) {} + + //KeyValue + cl = KeyValue.class; + expected = cs.estimateBase(cl, false); + KeyValue kv = new KeyValue(); + actual = kv.heapSize(); + if(expected != actual) { + cs.estimateBase(cl, true); + assertEquals(expected, actual); + } + + //LruBlockCache + cl = LruBlockCache.class; + expected = cs.estimateBase(cl, false); + LruBlockCache c = new LruBlockCache(1,1,200); + //Since minimum size for the for a LruBlockCache is 1 + //we need to remove one reference from the heapsize + actual = c.heapSize() - HeapSize.REFERENCE; + if(expected != actual) { + cs.estimateBase(cl, true); + assertEquals(expected, actual); + } + + //Put + cl = Put.class; + expected = cs.estimateBase(cl, false); + Put put = new Put(); + actual = put.heapSize(); + if(expected != actual) { + cs.estimateBase(cl, true); + assertEquals(expected, actual); + } + } + +} Index: /src/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java =================================================================== --- /src/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (working copy) @@ -27,14 +27,12 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.filter.Filter.ReturnCode; -import java.io.IOException; import java.util.NavigableSet; /** * A query matcher that is specifically designed for the scan case. */ public class ScanQueryMatcher extends QueryMatcher { - private Filter filter; // have to support old style filter for now. private RowFilterInterface oldFilter; Index: /src/java/org/apache/hadoop/hbase/KeyValue.java =================================================================== --- /src/java/org/apache/hadoop/hbase/KeyValue.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/KeyValue.java (working copy) @@ -1783,7 +1783,7 @@ // HeapSize public long heapSize() { - int dataLen = bytes.length + (bytes.length % 8); + int dataLen = length + (length % 8); return HeapSize.OBJECT + HeapSize.BYTE_ARRAY + dataLen + (2 * HeapSize.INT); } Index: /src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java =================================================================== --- /src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java (working copy) @@ -266,8 +266,7 @@ * @return hit ratio (double between 0 and 1) */ public double getHitRatio() { - return (double)((double)hitCount/ - ((double)(hitCount+missCount))); + return ((double)hitCount) / ((double)(hitCount+missCount)); } /** Index: /src/java/org/apache/hadoop/hbase/io/hfile/HFile.java =================================================================== --- /src/java/org/apache/hadoop/hbase/io/hfile/HFile.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/io/hfile/HFile.java (working copy) @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.RawComparator; @@ -1333,11 +1334,14 @@ long [] blockOffsets; int [] blockDataSizes; int size = 0; - + /* Needed doing lookup on blocks. */ final RawComparator comparator; + static final int OVERHEAD = 2 * HeapSize.INT + 1 * HeapSize.MULTI_ARRAY + + 2 * HeapSize.ARRAY + (2+1) * HeapSize.REFERENCE; + /* * Shutdown default constructor */ @@ -1493,8 +1497,25 @@ } public long heapSize() { - return this.size; + long size = OVERHEAD; + + //Calculating the size of blockKeys + if(blockKeys != null) { + for(byte [] bs : blockKeys) { + size += HeapSize.MULTI_ARRAY; + size += ClassSize.alignSize(bs.length); + } + } + if(blockOffsets != null) { + size += blockOffsets.length * HeapSize.LONG; + } + if(blockDataSizes != null) { + size += blockDataSizes.length * HeapSize.INT; + } + + return size; } + } /* Index: /src/java/org/apache/hadoop/hbase/io/HeapSize.java =================================================================== --- /src/java/org/apache/hadoop/hbase/io/HeapSize.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/io/HeapSize.java (working copy) @@ -77,4 +77,5 @@ * count of payload and hosting object sizings. */ public long heapSize(); + } Index: /src/java/org/apache/hadoop/hbase/util/ClassSize.java =================================================================== --- /src/java/org/apache/hadoop/hbase/util/ClassSize.java (revision 0) +++ /src/java/org/apache/hadoop/hbase/util/ClassSize.java (revision 0) @@ -0,0 +1,204 @@ +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Class for determining the "size" of a class, an attempt to calculate the + * actual bytes that an object of this class will occupy in memory + * + * The core of this class is taken from the Derby project + */ +public class ClassSize { + static final Log LOG = LogFactory.getLog(ClassSize.class); + + private int refSize; + private static final int objectOverhead = 2; // references, not bytes! + private static final int byteSize = 1; + private static final int booleanSize = 1; + private static final int charSize = 4; // Unicode + private static final int shortSize = 4; + private static final int intSize = 4; + private static final int longSize = 8; + private static final int floatSize = 4; + private static final int doubleSize = 8; + private int minObjectSize; + + + /** + * Constructor + * @throws Exception + */ + public ClassSize() throws Exception{ + // Figure out whether this is a 32 or 64 bit machine. + Runtime runtime = Runtime.getRuntime(); + int loops = 10; + int sz = 0; + for(int i=0; i 0 ) { + break; + } + } + + refSize = ( 4 > sz) ? 4 : sz; + minObjectSize = 4*refSize; + } + + /** + * The estimate of the size of a class instance depends on whether the JVM + * uses 32 or 64 bit addresses, that is it depends on the size of an object + * reference. It is a linear function of the size of a reference, e.g. + * 24 + 5*r where r is the size of a reference (usually 4 or 8 bytes). + * + * This method returns the coefficients of the linear function, e.g. {24, 5} + * in the above example. + * + * @param cl A class whose instance size is to be estimated + * @return an array of 3 integers. The first integer is the size of the + * primitives, the second the number of arrays and the third the number of + * references. + */ + private int [] getSizeCoefficients(Class cl, boolean debug) { + int primitives = 0; + int arrays = 0; + int references = objectOverhead; + + for( ; null != cl; cl = cl.getSuperclass()) { + Field[] field = cl.getDeclaredFields(); + if( null != field) { + for( int i = 0; i < field.length; i++) { + if( ! Modifier.isStatic( field[i].getModifiers())) { + Class fieldClass = field[i].getType(); + if( fieldClass.isArray()){ + arrays++; + } + else if(! fieldClass.isPrimitive()){ + references++; + } + else {// Is simple primitive + String name = fieldClass.getName(); + + if(name.equals("int") || name.equals( "I")) + primitives += intSize; + else if(name.equals("long") || name.equals( "J")) + primitives += longSize; + else if(name.equals("boolean") || name.equals( "Z")) + primitives += booleanSize; + else if(name.equals("short") || name.equals( "S")) + primitives += shortSize; + else if(name.equals("byte") || name.equals( "B")) + primitives += byteSize; + else if(name.equals("char") || name.equals( "C")) + primitives += charSize; + else if(name.equals("float") || name.equals( "F")) + primitives += floatSize; + else if(name.equals("double") || name.equals( "D")) + primitives += doubleSize; + } + if(debug) { + if (LOG.isDebugEnabled()) { + // Write out region name as string and its encoded name. + LOG.debug(field[i].getName()+ "\n\t" +field[i].getType()); + } + } + } + } + } + } + return new int [] {primitives, arrays, references}; + } + + /** + * Estimate the static space taken up by a class instance given the + * coefficients returned by getSizeCoefficients. + * + * @param coeff the coefficients + * + * @return the size estimate, in bytes + */ + private long estimateBaseFromCoefficients(int [] coeff, boolean debug) { + int size = coeff[0] + (coeff[1]*3 + coeff[2])*refSize; + + // Round up to a multiple of 8 + size = (size + 7)/8; + size *= 8; + if(debug) { + if (LOG.isDebugEnabled()) { + // Write out region name as string and its encoded name. + LOG.debug("Primitives " + coeff[0] + ", arrays " + coeff[1] + + ", references(inlcuding " + objectOverhead + + ", for object overhead) " + coeff[2] + ", refSize " + refSize + + ", size " + size); + } + } + return (size < minObjectSize) ? minObjectSize : size; + } + + /** + * Estimate the static space taken up by the fields of a class. This includes + * the space taken up by by references (the pointer) but not by the referenced + * object. So the estimated size of an array field does not depend on the size + * of the array. Similarly the size of an object (reference) field does not + * depend on the object. + * + * @return the size estimate in bytes. + */ + public long estimateBase(Class cl, boolean debug) { + return estimateBaseFromCoefficients( getSizeCoefficients(cl, debug), debug); + } + + /** + * Tries to clear all the memory used to estimate the reference size for the + * current JVM + * @param runtime + * @param i + * @throws Exception + */ + private void cleaner(Runtime runtime, int i) throws Exception{ + Thread.sleep(i*1000); + runtime.gc();runtime.gc(); runtime.gc();runtime.gc();runtime.gc(); + runtime.runFinalization(); + } + + + /** + * Aligns a number to 8. + * @param num number to align to 8 + * @return smallest number >= input that is a multiple of 8 + */ + public static long alignSize(int num) { + if(num % 8 == 0) return num; + return (num + (8 - (num % 8))); + } + + +} Index: /src/java/org/apache/hadoop/hbase/util/Bytes.java =================================================================== --- /src/java/org/apache/hadoop/hbase/util/Bytes.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/util/Bytes.java (working copy) @@ -265,11 +265,11 @@ return result; } - public static String toStringBinary(final byte []b) { + public static String toStringBinary(final byte [] b) { return toStringBinary(b, 0, b.length); } - public static String toStringBinary(final byte []b, int off, int len) { + public static String toStringBinary(final byte [] b, int off, int len) { String result = null; try { String first = new String(b, off, len, "ISO-8859-1"); @@ -1126,4 +1126,5 @@ } return value; } + } Index: /src/java/org/apache/hadoop/hbase/client/Put.java =================================================================== --- /src/java/org/apache/hadoop/hbase/client/Put.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/client/Put.java (working copy) @@ -49,6 +49,9 @@ private Map> familyMap = new TreeMap>(Bytes.BYTES_COMPARATOR); + private static final int OVERHEAD = 1 * HeapSize.ARRAY + 2 * HeapSize.LONG + + (2+1) * HeapSize.REFERENCE; + /** Constructor for Writable. DO NOT USE */ public Put() {} @@ -246,13 +249,13 @@ //HeapSize public long heapSize() { - long totalSize = 0; - for(Map.Entry> entry : this.familyMap.entrySet()) { - for(KeyValue kv : entry.getValue()) { - totalSize += kv.heapSize(); - } - } - return totalSize; + long mapSize = this.OVERHEAD; + for(Map.Entry> entry : this.familyMap.entrySet()) { + for(KeyValue kv : entry.getValue()) { + mapSize += kv.heapSize(); + } + } + return mapSize; } //Writable Index: /src/java/org/apache/hadoop/hbase/client/Scan.java =================================================================== --- /src/java/org/apache/hadoop/hbase/client/Scan.java (revision 785736) +++ /src/java/org/apache/hadoop/hbase/client/Scan.java (working copy) @@ -85,7 +85,6 @@ public Scan(byte [] startRow, Filter filter) { this(startRow); this.filter = filter; - } /** @@ -118,7 +117,6 @@ public Scan addFamily(byte [] family) { familyMap.remove(family); familyMap.put(family, null); - return this; } @@ -204,7 +202,7 @@ * Get all available versions. */ public Scan setMaxVersions() { - this.maxVersions = Integer.MAX_VALUE; + this.maxVersions = Integer.MAX_VALUE; return this; } @@ -236,7 +234,6 @@ */ public Scan setOldFilter(RowFilterInterface filter) { oldFilter = filter; - return this; } @@ -246,7 +243,6 @@ */ public Scan setFamilyMap(Map> familyMap) { this.familyMap = familyMap; - return this; } Index: /bin/rename_table.rb =================================================================== --- /bin/rename_table.rb (revision 785736) +++ /bin/rename_table.rb (working copy) @@ -10,17 +10,21 @@ # ${HBASE_HOME}/bin/hbase org.jruby.Main rename_table.rb # include Java +import java.util.ArrayList + import org.apache.hadoop.hbase.util.MetaUtils import org.apache.hadoop.hbase.util.FSUtils import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.util.Writables import org.apache.hadoop.hbase.HConstants import org.apache.hadoop.hbase.HBaseConfiguration -import org.apache.hadoop.hbase.HStoreKey import org.apache.hadoop.hbase.HRegionInfo import org.apache.hadoop.hbase.HTableDescriptor -import org.apache.hadoop.hbase.io.ImmutableBytesWritable -import org.apache.hadoop.hbase.regionserver.HLogEdit +import org.apache.hadoop.hbase.KeyValue +import org.apache.hadoop.hbase.client.Delete +import org.apache.hadoop.hbase.client.Get +import org.apache.hadoop.hbase.client.Result +import org.apache.hadoop.hbase.client.Scan import org.apache.hadoop.hbase.regionserver.HRegion import org.apache.hadoop.fs.Path import org.apache.hadoop.fs.FileSystem @@ -103,47 +107,80 @@ # After move, delete old entry and create a new. LOG.info("Scanning " + meta.getRegionNameAsString()) metaRegion = utils.getMetaRegion(meta) - scanner = metaRegion.getScanner(HConstants::COL_REGIONINFO_ARRAY, oldTableName, - HConstants::LATEST_TIMESTAMP, nil) + LOG.info("old table name " +ARGV[0]) + scan = Scan.new(oldTableName) + +# scan.addColumn(HConstants::CATALOG_FAMILY, HConstants::REGIONINFO_QUALIFIER) + scan.addFamily(HConstants::CATALOG_FAMILY) + scanner = metaRegion.getScanner(scan) begin - key = HStoreKey.new() - value = TreeMap.new(Bytes.BYTES_COMPARATOR) - while scanner.next(key, value) + kvs = ArrayList.new() + LOG.info("Before next") + begin + loop = scanner.next(kvs) + index = index + 1 - keyStr = key.toString() - oldHRI = Writables.getHRegionInfo(value.get(HConstants::COL_REGIONINFO)) - if !oldHRI - raise IOError.new(index.to_s + " HRegionInfo is null for " + keyStr) + LOG.info("kvs len = " + kvs.length.to_s()) + + if(kvs.length == 0) + LOG.info("Empty result") end - unless isTableRegion(oldTableName, oldHRI) - # If here, we passed out the table. Break. - break - end - oldRDir = Path.new(oldTableDir, Path.new(oldHRI.getEncodedName().to_s)) - if !fs.exists(oldRDir) - LOG.warn(oldRDir.toString() + " does not exist -- region " + - oldHRI.getRegionNameAsString()) - else - # Now make a new HRegionInfo to add to .META. for the new region. - newHRI = createHRI(newTableName, oldHRI) - newRDir = Path.new(newTableDir, Path.new(newHRI.getEncodedName().to_s)) - # Move the region in filesystem - LOG.info("Renaming " + oldRDir.toString() + " as " + newRDir.toString()) - fs.rename(oldRDir, newRDir) - # Removing old region from meta - LOG.info("Removing " + Bytes.toString(key.getRow()) + " from .META.") - metaRegion.deleteAll(key.getRow(), HConstants::LATEST_TIMESTAMP) - # Create 'new' region - newR = HRegion.new(rootdir, utils.getLog(), fs, c, newHRI, nil) - # Add new row. NOTE: Presumption is that only one .META. region. If not, - # need to do the work to figure proper region to add this new region to. - LOG.info("Adding to meta: " + newR.toString()) - HRegion.addRegionToMETA(metaRegion, newR) - LOG.info("Done moving: " + Bytes.toString(key.getRow())) - end - # Need to clear value else we keep appending values. - value.clear() - end + for i in (0...kvs.length) + kv = kvs[i] +# for kvt in kvs +# LOG.info("kv in kvs = " + kvt.to_s()) +# end + + if(kv != nil) + if(kv.getLength() != 0) + LOG.info("KeyValue " + kv.toString()) + keyStr = kv.getQualifier().to_s() + value = kv.getValue() + # oldHRI = Writables.getHRegionInfo(value.get(HConstants::CATALOG_FAMILY)) + oldHRI = Writables.getHRegionInfo(value) + if !oldHRI + raise IOError.new(index.to_s + " HRegionInfo is null for " + keyStr) + end + unless isTableRegion(oldTableName, oldHRI) + LOG.info("Breaking") + # If here, we passed out the table. Break. + break + end + LOG.info("oldTableDir " + oldTableDir.to_s()) + oldRDir = Path.new(oldTableDir, Path.new(oldHRI.getEncodedName().to_s)) + if !fs.exists(oldRDir) + LOG.warn(oldRDir.toString() + " does not exist -- region " + + oldHRI.getRegionNameAsString()) + else + # Now make a new HRegionInfo to add to .META. for the new region. + newHRI = createHRI(newTableName, oldHRI) + newRDir = Path.new(newTableDir, Path.new(newHRI.getEncodedName().to_s)) + # Move the region in filesystem + LOG.info("Renaming " + oldRDir.toString() + " as " + newRDir.toString()) + fs.rename(oldRDir, newRDir) + # Removing old region from meta + + LOG.info("Removing " + Bytes.toString(kv.getRow()) + " from .META.") + delete = Delete.new(kv.getRow()) + metaRegion.delete(delete, nil, true); +# metaRegion.deleteAll(key.getRow(), HConstants::LATEST_TIMESTAMP) + # Create 'new' region + newR = HRegion.new(rootdir, utils.getLog(), fs, c, newHRI, nil) + # Add new row. NOTE: Presumption is that only one .META. region. If not, + # need to do the work to figure proper region to add this new region to. + LOG.info("Adding to meta: " + newR.toString()) + HRegion.addRegionToMETA(metaRegion, newR) + LOG.info("Done moving: " + Bytes.toString(key.getRow())) + end + # Need to clear value else we keep appending values. + # value.clear() + end + else + LOG.info("kv == nil") + end + end + end while(loop) + ensure scanner.close() end @@ -152,3 +189,177 @@ ensure utils.shutdown() end + + + + + +# +# Script that renames table in hbase. As written, will not work for rare +# case where there is more than one region in .META. table. Does the +# update of the hbase .META. and moves the directories in filesystem. +# HBase MUST be shutdown when you run this script. On successful rename, +# DOES NOT remove old directory from filesystem because was afraid this +# script could remove the original table on error. +# +# To see usage for this script, run: +# +# ${HBASE_HOME}/bin/hbase org.jruby.Main rename_table.rb +# +# include Java +# import java.util.TreeMap +# +# import org.apache.hadoop.hbase.HConstants +# import org.apache.hadoop.hbase.HBaseConfiguration +# import org.apache.hadoop.hbase.HStoreKey +# import org.apache.hadoop.hbase.HRegionInfo +# import org.apache.hadoop.hbase.HTableDescriptor +# import org.apache.hadoop.hbase.KeyValue +# import org.apache.hadoop.hbase.client.Get +# import org.apache.hadoop.hbase.client.Result +# import org.apache.hadoop.hbase.client.Scan +# import org.apache.hadoop.hbase.io.ImmutableBytesWritable +# import org.apache.hadoop.hbase.regionserver.HLogEdit +# import org.apache.hadoop.hbase.regionserver.HRegion +# import org.apache.hadoop.hbase.util.MetaUtils +# import org.apache.hadoop.hbase.util.FSUtils +# import org.apache.hadoop.hbase.util.Bytes +# import org.apache.hadoop.hbase.util.Writables +# +# import org.apache.hadoop.fs.Path +# import org.apache.hadoop.fs.FileSystem +# import org.apache.commons.logging.Log +# import org.apache.commons.logging.LogFactory +# +# Name of this script +# NAME = "rename_table" +# +# Print usage for this script +# def usage +# puts 'Usage: %s.rb ' % NAME +# exit! +# end +# +# Passed 'dir' exists and is a directory else exception +# def isDirExists(fs, dir) +# raise IOError.new("Does not exit: " + dir.toString()) unless fs.exists(dir) +# raise IOError.new("Not a directory: " + dir.toString()) unless fs.isDirectory(dir) +# end +# +# Returns true if the region belongs to passed table +# def isTableRegion(tableName, hri) +# return Bytes.equals(hri.getTableDesc().getName(), tableName) +# end +# +# Create new HRI based off passed 'oldHRI' +# def createHRI(tableName, oldHRI) +# htd = oldHRI.getTableDesc() +# newHtd = HTableDescriptor.new(tableName) +# for family in htd.getFamilies() +# newHtd.addFamily(family) +# end +# return HRegionInfo.new(newHtd, oldHRI.getStartKey(), oldHRI.getEndKey(), +# oldHRI.isSplit()) +# end +# +# Check arguments +# if ARGV.size != 2 +# usage +# end +# +# Check good table names were passed. +# oldTableName = HTableDescriptor.isLegalTableName(ARGV[0].to_java_bytes) +# newTableName = HTableDescriptor.isLegalTableName(ARGV[1].to_java_bytes) +# +# Get configuration to use. +# c = HBaseConfiguration.new() +# +# Set hadoop filesystem configuration using the hbase.rootdir. +# Otherwise, we'll always use localhost though the hbase.rootdir +# might be pointing at hdfs location. +# c.set("fs.default.name", c.get(HConstants::HBASE_DIR)) +# fs = FileSystem.get(c) +# +# If new table directory does not exit, create it. Keep going if already +# exists because maybe we are rerunning script because it failed first +# time. +# rootdir = FSUtils.getRootDir(c) +# oldTableDir = Path.new(rootdir, Path.new(Bytes.toString(oldTableName))) +# isDirExists(fs, oldTableDir) +# newTableDir = Path.new(rootdir, Bytes.toString(newTableName)) +# if !fs.exists(newTableDir) +# fs.mkdirs(newTableDir) +# end +# +# Get a logger and a metautils instance. +# LOG = LogFactory.getLog(NAME) +# utils = MetaUtils.new(c) +# +# Start. Get all meta rows. +# begin +# Get list of all .META. regions that contain old table name +# metas = utils.getMETARows(oldTableName) +# index = 0 +# for meta in metas +# For each row we find, move its region from old to new table. +# Need to update the encoded name in the hri as we move. +# After move, delete old entry and create a new. +# LOG.info("Scanning " + meta.getRegionNameAsString()) +# metaRegion = utils.getMetaRegion(meta) +# if(oldTableName == nil) +# LOG.info("old table == nil") +# end +# scan = Scan.new(oldTableName) +# scanner = metaRegion.getScanner(scan) +# LOG.info("old table ") +# scanner = metaRegion.getScanner(HConstants::COL_REGIONINFO_ARRAY, oldTableName, +# HConstants::LATEST_TIMESTAMP, nil) +# begin +# key = HStoreKey.new() +# value = TreeMap.new(Bytes.BYTES_COMPARATOR) +# while scanner.next() +# while scanner.next(key, value) +# index = index + 1 +# keyStr = key.toString() +# oldHRI = Writables.getHRegionInfo(value.get(HConstants::COL_REGIONINFO)) +# if !oldHRI +# raise IOError.new(index.to_s + " HRegionInfo is null for " + keyStr) +# end +# unless isTableRegion(oldTableName, oldHRI) +# If here, we passed out the table. Break. +# break +# end +# oldRDir = Path.new(oldTableDir, Path.new(oldHRI.getEncodedName().to_s)) +# if !fs.exists(oldRDir) +# LOG.warn(oldRDir.toString() + " does not exist -- region " + +# oldHRI.getRegionNameAsString()) +# else +# Now make a new HRegionInfo to add to .META. for the new region. +# newHRI = createHRI(newTableName, oldHRI) +# newRDir = Path.new(newTableDir, Path.new(newHRI.getEncodedName().to_s)) +# Move the region in filesystem +# LOG.info("Renaming " + oldRDir.toString() + " as " + newRDir.toString()) +# fs.rename(oldRDir, newRDir) +# Removing old region from meta +# LOG.info("Removing " + Bytes.toString(key.getRow()) + " from .META.") +# metaRegion.deleteAll(key.getRow(), HConstants::LATEST_TIMESTAMP) +# Create 'new' region +# newR = HRegion.new(rootdir, utils.getLog(), fs, c, newHRI, nil) +# Add new row. NOTE: Presumption is that only one .META. region. If not, +# need to do the work to figure proper region to add this new region to. +# LOG.info("Adding to meta: " + newR.toString()) +# HRegion.addRegionToMETA(metaRegion, newR) +# LOG.info("Done moving: " + Bytes.toString(key.getRow())) +# end +# Need to clear value else we keep appending values. +# value.clear() +# end +# ensure +# scanner.close() +# end +# end +# LOG.info("Renamed table -- manually delete " + oldTableDir.toString()); +# ensure +# utils.shutdown() +# end + Index: /bin/HBase.rb =================================================================== --- /bin/HBase.rb (revision 785736) +++ /bin/HBase.rb (working copy) @@ -303,7 +303,6 @@ # public HColumnDescriptor(final byte [] familyName, final int maxVersions, # final String compression, final boolean inMemory, # final boolean blockCacheEnabled, final int blocksize, - # final int maxValueLength, # final int timeToLive, final boolean bloomFilter) { name = arg[NAME] raise ArgumentError.new("Column family " + arg + " must have a name") \ @@ -373,7 +372,7 @@ maxlength = -1 if args != nil and args.length > 0 limit = args["LIMIT"] || -1 - maxlength = args["MAXLENGTH"] || -1 + #maxlength = args["MAXLENGTH"] || -1 filter = args["FILTER"] || nil startrow = args["STARTROW"] || "" stoprow = args["STOPROW"] || nil