Index: D:/workspace/hbase-0.90.2-trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java =================================================================== --- D:/workspace/hbase-0.90.2-trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java (revision 0) +++ D:/workspace/hbase-0.90.2-trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java (revision 69299) @@ -0,0 +1,58 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import static org.junit.Assert.assertEquals; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.util.RegionSplitter.ASCIISplit; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +/** + * Test pre-sharding + */ +public class TestRegionSplitter { + private static final Log LOG = LogFactory.getLog(TestRegionSplitter.class); + @BeforeClass + public static void setUpBeforeClass() throws Exception { + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + } + + @Test + public void TestSplit() { + ASCIISplit testsplit = new ASCIISplit(); + byte[][] splits = testsplit.split(5); + String[] expects={"00000000","1R333333","2s666666","4L999999","5mCCCCCC"}; + for(int i = 0; i < expects.length; i ++){ + assertEquals(expects[i], new String(splits[i])); + } + } + + public static void main(String[] args){ + TestRegionSplitter a = new TestRegionSplitter(); + a.TestSplit(); + } +} Index: D:/workspace/hbase-0.90.2-trunk/src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- D:/workspace/hbase-0.90.2-trunk/src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 66982) +++ D:/workspace/hbase-0.90.2-trunk/src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 69299) @@ -363,6 +363,9 @@ public static final String HBASE_MASTER_LOGCLEANER_PLUGINS = "hbase.master.logcleaner.plugins"; + + public static final String HBASE_SPLIT_ALGORITHM = + "hbase.util.presharding.algorithm"; /* * Minimum percentage of free heap necessary for a successful cluster startup. Index: D:/workspace/hbase-0.90.2-trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java =================================================================== --- D:/workspace/hbase-0.90.2-trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java (revision 66982) +++ D:/workspace/hbase-0.90.2-trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java (revision 69299) @@ -30,7 +30,6 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -52,12 +51,14 @@ import org.apache.hadoop.hbase.client.NoServerForRegionException; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.record.compiler.generated.ParseException; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; + /** * The {@link RegionSplitter} class provides several utilities to help in the * administration lifecycle for developers who choose to manually split regions @@ -130,7 +131,6 @@ */ public class RegionSplitter { static final Log LOG = LogFactory.getLog(RegionSplitter.class); - /** * A generic interface for the RegionSplitter code to use for all it's * functionality. Note that the original authors of this code use @@ -225,10 +225,11 @@ * user requested exit * @throws ParseException * problem parsing user input + * @throws org.apache.commons.cli.ParseException */ @SuppressWarnings("static-access") public static void main(String[] args) throws IOException, - InterruptedException, ParseException { + InterruptedException, ParseException, org.apache.commons.cli.ParseException { Configuration conf = HBaseConfiguration.create(); // parse user input @@ -251,6 +252,9 @@ opt.addOption(null, "risky", false, "Skip verification steps to complete quickly." + "STRONGLY DISCOURAGED for production systems. "); + opt.addOption(OptionBuilder.withArgName("startKey,endKey").hasArg() + .withDescription("Describe the startKey and endKey, split by a comma," + + " default is [00000000,7FFFFFFF]").create("k")); CommandLine cmd = new GnuParser().parse(opt, args); if (cmd.hasOption("D")) { @@ -281,6 +285,13 @@ if (createTable) { conf.set("split.count", cmd.getOptionValue("c")); + if(cmd.getOptionValue("k") != null){ + String[] keyRange = cmd.getOptionValue("k").split(","); + if(keyRange.length == 2){ + start = keyRange[0]; + end = keyRange[1]; + } + } createPresplitTable(tableName, cmd.getOptionValue("f").split(":"), conf); } @@ -292,10 +303,19 @@ } } + @SuppressWarnings("unchecked") static void createPresplitTable(String tableName, String[] columnFamilies, Configuration conf) throws IOException, InterruptedException { + String classSplitAlgorithm = conf.get(HConstants.HBASE_SPLIT_ALGORITHM); + Class splitclass = ASCIISplit.class; + try{ + if(classSplitAlgorithm != null) + splitclass = (Class)Class.forName(classSplitAlgorithm); + } catch (ClassNotFoundException e) { + LOG.warn("Can't find class " + classSplitAlgorithm); + } Class splitClass = conf.getClass( - "split.algorithm", MD5StringSplit.class, SplitAlgorithm.class); + "split.algorithm", splitclass, SplitAlgorithm.class); SplitAlgorithm splitAlgo; try { splitAlgo = splitClass.newInstance(); @@ -317,6 +337,7 @@ HBaseAdmin admin = new HBaseAdmin(conf); Preconditions.checkArgument(!admin.tableExists(tableName), "Table already exists: " + tableName); + splitAlgo.split(splitCount); admin.createTable(desc, splitAlgo.split(splitCount)); LOG.debug("Table created! Waiting for regions to show online in META..."); @@ -696,9 +717,9 @@ * they were binary. */ public static class MD5StringSplit implements SplitAlgorithm { - final static String MAXMD5 = "7FFFFFFF"; - final static BigInteger MAXMD5_INT = new BigInteger(MAXMD5, 16); - final static int rowComparisonLength = MAXMD5.length(); + static String MAXMD5 = "7FFFFFFF"; + static BigInteger MAXMD5_INT = new BigInteger(MAXMD5, 16); + static int rowComparisonLength = MAXMD5.length(); public byte[] split(byte[] start, byte[] end) { BigInteger s = convertToBigInteger(start); @@ -781,4 +802,168 @@ } } + static class ASCIIConvert{ + final String sascii = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + final char[] aa = new char[sascii.length()]; + final int[] bb = new int[sascii.length()]; + public ASCIIConvert(){ + sascii.getChars(0, aa.length, aa, 0); + for(int i=0;i"00000000" => "7FFFFFFF" in + * default, and you can change them by input "-k startkey,endkey". The ASCIISplit limits + * the scope of [0-9,A-Z,a-z]; + */ + public static class ASCIISplit implements SplitAlgorithm { + static String MAX_ASCII = end; + static String MIN_ASCII = start; + static char[] MAX_ASCII_CHAR = MAX_ASCII.toCharArray(); + static char[] MIN_ASCII_CHAR = MIN_ASCII.toCharArray(); + static int rowComparisonLength = MAX_ASCII.length(); + ASCIIConvert asc = new ASCIIConvert(); + + public byte[] split(byte[] start, byte[] end) { + int[] s = convertToInts(start); + int[] e = convertToInts(end); + Preconditions.checkArgument(!e.equals(BigInteger.ZERO)); + return convertToByte(split2(s, e)); + } + + public byte[][] split(int n) { + int[][] total = new int[n][rowComparisonLength]; + int b = 0; + + for(int j = 0; j < n; j ++){ + for(int i = 0; i < rowComparisonLength; i ++){ + int sub = asc.getint(MAX_ASCII_CHAR[i]) - asc.getint(MIN_ASCII_CHAR[i]); + if(i == 0) + total[j][i] = (sub*j)/n + asc.getint(MIN_ASCII_CHAR[i]); + else{ + int tmp = (sub*j+b*asc.getMax())/n + asc.getint(MIN_ASCII_CHAR[i]); + total[j][i] = tmp; + for(int k = i; k > 0; k --){ + while(total[j][k] > asc.getMax()){ + total[j][k-1] ++; + total[j][k] -= asc.getMax(); + } + } + } + b = (sub*j)%n; + } + } + return convertToBytes(total, n); + } + + public byte[] firstRow() { + int[] a= new int[rowComparisonLength]; + for(char i: MIN_ASCII_CHAR){ + a[i] = asc.getint(i); + } + return convertToByte(a); + } + + public byte[] lastRow() { + int[] a= new int[rowComparisonLength]; + for(char i: MAX_ASCII_CHAR){ + a[i] = asc.getint(i); + } + return convertToByte(a); + } + + public byte[] strToRow(String in) { + char[] chars = in.toCharArray(); + int[] returnBytes = new int[chars.length]; + + for(int i=0; i + hbase.util.presharding.algorithm + org.apache.hadoop.hbase.util.RegionSplitter.ASCIISplit + which split algorithm chose for pre-sharding. ASCIISplit + and MD5StringSplit should be chosen. + + + hbase.server.thread.wakefrequency 10000 Time to sleep in between searches for work (in milliseconds).