Index: main/java/org/apache/hadoop/hbase/util/RegionSplitter.java =================================================================== --- main/java/org/apache/hadoop/hbase/util/RegionSplitter.java (revision 67480) +++ main/java/org/apache/hadoop/hbase/util/RegionSplitter.java (working copy) @@ -1,3 +1,5 @@ +package org.apache.hadoop.hbase.util; + /** * Copyright 2010 The Apache Software Foundation * @@ -17,8 +19,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hbase.util; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.math.BigInteger; import java.util.LinkedList; @@ -30,7 +33,6 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -52,12 +54,17 @@ import org.apache.hadoop.hbase.client.NoServerForRegionException; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.record.compiler.generated.ParseException; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; + /** * The {@link RegionSplitter} class provides several utilities to help in the * administration lifecycle for developers who choose to manually split regions @@ -140,6 +147,7 @@ * -D split.algorithm= */ public static interface SplitAlgorithm { + void setRangeString(String minKey, String maxKey); /** * Split a pre-existing region into 2 regions. * @@ -225,10 +233,11 @@ * user requested exit * @throws ParseException * problem parsing user input + * @throws org.apache.commons.cli.ParseException */ @SuppressWarnings("static-access") public static void main(String[] args) throws IOException, - InterruptedException, ParseException { + InterruptedException, ParseException, org.apache.commons.cli.ParseException { Configuration conf = HBaseConfiguration.create(); // parse user input @@ -251,6 +260,8 @@ opt.addOption(null, "risky", false, "Skip verification steps to complete quickly." + "STRONGLY DISCOURAGED for production systems. "); + opt.addOption(OptionBuilder.withArgName("startKey,endKey").hasArg() + .withDescription("default is 000000-ffffff").create("k")); CommandLine cmd = new GnuParser().parse(opt, args); if (cmd.hasOption("D")) { @@ -281,6 +292,11 @@ if (createTable) { conf.set("split.count", cmd.getOptionValue("c")); + if(cmd.getOptionValue("k") != null){ + String[] keyRange = cmd.getOptionValue("k").split(","); + start = keyRange[0]; + end = keyRange[1]; + } createPresplitTable(tableName, cmd.getOptionValue("f").split(":"), conf); } @@ -295,10 +311,11 @@ static void createPresplitTable(String tableName, String[] columnFamilies, Configuration conf) throws IOException, InterruptedException { Class splitClass = conf.getClass( - "split.algorithm", MD5StringSplit.class, SplitAlgorithm.class); + "split.algorithm", ASCIISplit.class, SplitAlgorithm.class); SplitAlgorithm splitAlgo; try { splitAlgo = splitClass.newInstance(); + splitAlgo.setRangeString(start,end); } catch (Exception e) { throw new IOException("Problem loading split algorithm: ", e); } @@ -317,6 +334,7 @@ HBaseAdmin admin = new HBaseAdmin(conf); Preconditions.checkArgument(!admin.tableExists(tableName), "Table already exists: " + tableName); + splitAlgo.split(splitCount); admin.createTable(desc, splitAlgo.split(splitCount)); LOG.debug("Table created! Waiting for regions to show online in META..."); @@ -696,10 +714,17 @@ * they were binary. */ public static class MD5StringSplit implements SplitAlgorithm { - final static String MAXMD5 = "7FFFFFFF"; - final static BigInteger MAXMD5_INT = new BigInteger(MAXMD5, 16); - final static int rowComparisonLength = MAXMD5.length(); + static String MAXMD5 = "7FFFFFFF"; + static String MINMD5 = "00000000"; + static BigInteger MAXMD5_INT = new BigInteger(MAXMD5, 16); + static int rowComparisonLength = MAXMD5.length(); + public void setRangeString(String minKey, String maxKey){ + MAXMD5 = maxKey; + MINMD5 = maxKey; + MAXMD5_INT = new BigInteger(MAXMD5, 16); + rowComparisonLength = MAXMD5.length(); + } public byte[] split(byte[] start, byte[] end) { BigInteger s = convertToBigInteger(start); BigInteger e = convertToBigInteger(end); @@ -781,4 +806,167 @@ } } + static class ASCIIConvert{ + String sascii = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + char[] aa = new char[sascii.length()]; + int[] bb = new int[sascii.length()]; + public ASCIIConvert(){ + sascii.getChars(0, aa.length, aa, 0); + for(int i=0;i"00000000" => "7FFFFFFF" + * and are left-padded with zeros to keep the same order lexographically as if + * they were binary. + */ + public static class ASCIISplit implements SplitAlgorithm { + static String MAXMD5 = ""; + static String MINMD5 = ""; + static char[] MAXMD5_CHAR; + static int rowComparisonLength; + static char[] MINMD5_CHAR; + ASCIIConvert asc = new ASCIIConvert(); + + public void setRangeString(String minKey, String maxKey){ + MAXMD5 = maxKey; + MINMD5 = minKey; + MAXMD5_CHAR = MAXMD5.toCharArray(); + MINMD5_CHAR = MINMD5.toCharArray(); + rowComparisonLength = MAXMD5.length(); + } + + public byte[] split(byte[] start, byte[] end) { + int[] s = convertToInts(start); + int[] e = convertToInts(end); + Preconditions.checkArgument(!e.equals(BigInteger.ZERO)); + return convertToByte(split2(s, e)); + } + + public byte[][] split(int n) { + int[][] total = new int[n][rowComparisonLength]; + int b = 0; + + for(int j = 0; j < n; j ++){ + for(int i = 0; i < rowComparisonLength; i ++){ + int sub = asc.getint(MAXMD5_CHAR[i]) - asc.getint(MINMD5_CHAR[i]); + if(i == 0) + total[j][i] = (sub*j)/n + asc.getint(MINMD5_CHAR[i]); + else + total[j][i] = (sub*j+b*asc.getMax())/n + asc.getint(MINMD5_CHAR[i]); + b = (sub*j+b*asc.getMax())%n > 0?1:0; + } + } + return convertToBytes(total, n); + } + + public byte[] firstRow() { + int[] a= new int[rowComparisonLength]; + for(char i: MINMD5_CHAR){ + a[i] = asc.getint(i); + } + return convertToByte(a); + } + + public byte[] lastRow() { + int[] a= new int[rowComparisonLength]; + for(char i: MAXMD5_CHAR){ + a[i] = asc.getint(i); + } + return convertToByte(a); + } + + public byte[] strToRow(String in) { + char[] chars = in.toCharArray(); + int[] returnBytes = new int[chars.length]; + + for(int i=0; i