commit f98f733837c98baa0551f36cf737a4fa01ca7c05 Author: nspiegelberg Date: 4 hours ago HBASE-3448 : RegionSplitter, utility class to manually split tables diff --git a/src/main/java/org/apache/hadoop/hbase/client/HTable.java b/src/main/java/org/apache/hadoop/hbase/client/HTable.java index b9be112..a5a90db 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/HTable.java +++ b/src/main/java/org/apache/hadoop/hbase/client/HTable.java @@ -19,9 +19,9 @@ */ package org.apache.hadoop.hbase.client; -import java.io.IOException; import java.io.DataInput; import java.io.DataOutput; +import java.io.IOException; import java.lang.reflect.Proxy; import java.util.ArrayList; import java.util.Arrays; @@ -1330,6 +1330,14 @@ public class HTable implements HTableInterface { getRegionCachePrefetch(tableName); } + /** + * Explicitly clears the region cache to fetch the latest value from META. + * This is a power user function: avoid unless you know the ramifications. + */ + public void clearRegionCache() { + this.connection.clearRegionCache(); + } + @Override public T coprocessorProxy( Class protocol, byte[] row) { diff --git a/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java new file mode 100644 index 0000000..dd154d3 --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java @@ -0,0 +1,725 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.LinkedList; +import java.util.Set; +import java.util.TreeMap; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.regionserver.Store; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hdfs.DistributedFileSystem; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +/** + * The {@link RegionSplitter} class provides several utilities to help in the + * administration lifecycle for developers who choose to manually split regions + * instead of having HBase handle that automatically. The most useful utilities + * are: + * + * 1) Create a table with a specified number of pre-split regions + * 2) Execute a rolling split of all regions on an existing table + * + * Both operations can be safely done on a live server. + * + * Question: How do I turn off automatic splitting? + * Answer: Automatic splitting is determined by the configuration value + * "hbase.hregion.max.filesize". It is not recommended that you set this to + * Long.MAX_VALUE in case you forget about manual splits. A suggested setting + * is 100GB, which would result in > 1hr major compactions if reached. + * + * Question: Why did the original authors decide to manually split? + * Answer: Specific workload characteristics of our use case allowed us to + * benefit from a manual split system. + * + * 1) Data (~1k) that would grow instead of being replaced + * 2) Data growth was roughly uniform across all regions + * 3) OLTP workload. Data loss is a big deal. + * + * Question: Why is manual splitting good for this workload? + * Answer: Although automated splitting is not a bad option, there are benefits + * to manual splitting. + * + * 1) With growing amounts of data, splits will continually be needed. Since + * you always know exactly what regions you have, long-term debugging + * and profiling is much easier with manual splits. It is hard to trace + * the logs to understand region level problems if it keeps splitting + * and getting renamed. + * 2) Data offlining bugs + unknown number of split regions == oh crap! + * If an HLog or StoreFile was mistakenly unprocessed by HBase due to a + * weird bug and you notice it a day or so later, you can be assured that + * the regions specified in these files are the same as the current regions + * and you have less headaches trying to restore/replay your data. + * 2) You can finely tune your compaction algorithm. With roughly uniform + * data growth, it's easy to cause split / compaction storms as the regions + * all roughly hit the same data size at the same time. With manual splits, + * you can let staggered, time-based major compactions spread out your + * network IO load. + * + * Question: What's the optimal number of pre-split regions to create? + * Answer: Mileage will vary depending upon your application. + * + * The short answer for our application is that we started with 10 pre-split + * regions / server and watched our data growth over time. It's better to err + * on the side of too little regions and rolling split later. + * + * The more complicated answer is that this depends upon the largest storefile + * in your region. With a growing data size, this will get larger over time. + * You want the largest region to be just big enough that the {@link Store} + * compact selection algorithm only compacts it due to a timed major. If you + * don't, your cluster can be prone to compaction storms as the algorithm + * decides to run major compactions on a large series of regions all at once. + * + * If you pre-split your regions too thin, you can increase the major + * compaction interval by configuring HConstants.MAJOR_COMPACTION_PERIOD. If + * your data size grows too large, use this script to perform a network IO + * safe rolling split of all regions. + */ +public class RegionSplitter { + static final Log LOG = LogFactory.getLog(RegionSplitter.class); + + /** + * A generic interface for the RegionSplitter code to use for all it's + * functionality. Note that the original authors of this code use + * MD5StringSplit to partition their table but provided this for your custom + * algorithm. To use, create a new derived class from this interface and call + * the RegionSplitter class with the argument: -D + * split.algorithm= + */ + public static interface SplitAlgorithm { + /** + * Split a pre-existing region into 2 regions. + * + * @param start row + * @param end row + * @return the split row to use + */ + byte[] split(byte[] start, byte[] end); + + /** + * Split an entire table. + * @param numberOfSplits number of regions to split the table into + * + * @return array of split keys for the initial regions of the table + */ + byte[][] split(int numberOfSplits); + + /** + * In HBase, the first row is represented by an empty byte array. This might + * cause problems with your split algorithm or row printing. All your APIs + * will be passed firstRow() instead of empty array. + * + * @return your representation of your first row + */ + byte[] firstRow(); + + /** + * In HBase, the last row is represented by an empty byte array. This might + * cause problems with your split algorithm or row printing. All your APIs + * will be passed firstRow() instead of empty array. + * + * @return your representation of your last row + */ + byte[] lastRow(); + + /** + * @param input + * user or file input for row + * @return byte array representation of this row for HBase + */ + byte[] strToRow(String input); + + /** + * @param row + * byte array representing a row in HBase + * @return String to use for debug & file printing + */ + String rowToStr(byte[] row); + + /** + * @return the separator character to use when storing / printing the row + */ + String separator(); + } + + /** + * The main function for the RegionSplitter application. Common uses: + * + * # create a table named 'myTable' with 60 pre-split regions + * # containing 2 column families 'test' & 'rs' + * bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs myTable + * + * # perform a rolling split of 'myTable' (i.e. 60 => 120 regions), + * # 2 outstanding splits at a time + * bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable + * + * @param args + * Usage: RegionSplitter + * <-c <# regions> | -r [-o <# outstanding splits>]> + * [-D ] + * @throws IOException + * HBase IO problem + * @throws InterruptedException + * user requested exit + * @throws ParseException + * problem parsing user input + */ + @SuppressWarnings("static-access") + public static void main(String[] args) throws IOException, + InterruptedException, ParseException { + Configuration conf = HBaseConfiguration.create(); + + // parse user input + Options opt = new Options(); + opt.addOption(OptionBuilder.withArgName("property=value").hasArg() + .withDescription("Override HBase Configuration Settings") + .create("D")); + opt.addOption(OptionBuilder.withArgName("region count").hasArg() + .withDescription("Create a new table with a pre-split number of regions") + .create("c")); + opt.addOption(OptionBuilder.withArgName("family:family:...").hasArg() + .withDescription("Column Families to create with new table. Required with -c") + .create("f")); + opt.addOption("h", false, "Print this usage help"); + opt.addOption("r", false, "Perform a rolling split of an existing region"); + opt.addOption(OptionBuilder.withArgName("count").hasArg() + .withDescription("Max outstanding splits that have unfinished major compactions") + .create("o")); + opt.addOption(null, "risky", false, "Skip verification steps to complete quickly." + + "STRONGLY DISCOURAGED for production systems. "); + CommandLine cmd = new GnuParser().parse(opt, args); + + if (cmd.hasOption("D")) { + for (String confOpt : cmd.getOptionValues("D")) { + String[] kv = confOpt.split("=", 2); + if (kv.length == 2) { + conf.set(kv[0], kv[1]); + LOG.debug("-D configuration override: " + kv[0] + "=" + kv[1]); + } else { + throw new ParseException("-D option format invalid: " + confOpt); + } + } + } + + if (cmd.hasOption("risky")) { + conf.setBoolean("split.verify", false); + } + + boolean createTable = cmd.hasOption("c") && cmd.hasOption("f"); + boolean rollingSplit = cmd.hasOption("r"); + boolean oneOperOnly = createTable ^ rollingSplit; + + if (1 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) { + new HelpFormatter().printHelp("RegionSplitter
", opt); + return; + } + String tableName = cmd.getArgs()[0]; + + if (createTable) { + conf.set("split.count", cmd.getOptionValue("c")); + createPresplitTable(tableName, cmd.getOptionValue("f").split(":"), conf); + } + + if (rollingSplit) { + if (cmd.hasOption("o")) { + conf.set("split.outstanding", cmd.getOptionValue("o")); + } + rollingSplit(tableName, conf); + } + } + + static void createPresplitTable(String tableName, + String[] columnFamilies, Configuration conf) + throws IOException, InterruptedException { + Class splitClass = conf.getClass( + "split.algorithm", MD5StringSplit.class, SplitAlgorithm.class); + SplitAlgorithm splitAlgo; + try { + splitAlgo = splitClass.newInstance(); + } catch (Exception e) { + throw new IOException("Problem loading split algorithm: ", e); + } + final int splitCount = conf.getInt("split.count", 0); + Preconditions.checkArgument(splitCount > 1, "Split count must be > 1"); + + Preconditions.checkArgument(columnFamilies.length > 0, + "Must specify at least one column family. "); + LOG.debug("Creating table " + tableName + " with " + columnFamilies.length + + " column families. Presplitting to " + splitCount + " regions"); + + HTableDescriptor desc = new HTableDescriptor(tableName); + for (String cf : columnFamilies) { + desc.addFamily(new HColumnDescriptor(Bytes.toBytes(cf))); + } + HBaseAdmin admin = new HBaseAdmin(conf); + Preconditions.checkArgument(!admin.tableExists(tableName), + "Table already exists: " + tableName); + admin.createTable(desc, splitAlgo.split(splitCount)); + LOG.debug("Table created! Waiting for regions to show online in META..."); + + if (!conf.getBoolean("split.verify", true)) { + // NOTE: createTable is synchronous on the table, but not on the regions + HTable table = new HTable(tableName); + int onlineRegions = 0; + while (onlineRegions < splitCount) { + onlineRegions = table.getRegionsInfo().size(); + LOG.debug(onlineRegions + " of " + splitCount + + " regions online..."); + if (onlineRegions < splitCount) { + Thread.sleep(10 * 1000); // sleep + } + } + } + + LOG.debug("Finished creating table with " + splitCount + " regions"); + } + + static void rollingSplit(String tableName, Configuration conf) + throws IOException, InterruptedException { + Class splitClass = conf.getClass( + "split.algorithm", MD5StringSplit.class, SplitAlgorithm.class); + SplitAlgorithm splitAlgo; + try { + splitAlgo = splitClass.newInstance(); + } catch (Exception e) { + throw new IOException("Problem loading split algorithm: ", e); + } + final int minOS = conf.getInt("split.outstanding", 2); + + HTable table = new HTable(conf, tableName); + + // max outstanding splits. default == 50% of servers + final int MAX_OUTSTANDING = Math.max(table.getCurrentNrHRS() / 2, minOS); + + Path hbDir = new Path(conf.get(HConstants.HBASE_DIR)); + Path tableDir = HTableDescriptor.getTableDir(hbDir, table.getTableName()); + Path splitFile = new Path(tableDir, "_balancedSplit"); + FileSystem fs = FileSystem.get(conf); + + // get a list of daughter regions to create + LinkedList> tmpRegionSet = getSplits(table, splitAlgo); + LinkedList> outstanding = Lists.newLinkedList(); + int splitCount = 0; + final int origCount = tmpRegionSet.size(); + + // all splits must compact & we have 1 compact thread, so 2 split + // requests to the same RS can stall the outstanding split queue. + // To fix, group the regions into an RS pool and round-robin through it + LOG.debug("Bucketing regions by regionserver..."); + TreeMap>> + daughterRegions = Maps.newTreeMap(); + for (Pair dr : tmpRegionSet) { + HServerAddress rsLocation = table.getRegionLocation(dr.getSecond()) + .getServerAddress(); + if (!daughterRegions.containsKey(rsLocation)) { + LinkedList> entry = Lists.newLinkedList(); + daughterRegions.put(rsLocation, entry); + } + daughterRegions.get(rsLocation).add(dr); + } + LOG.debug("Done with bucketing. Split time!"); + long startTime = System.currentTimeMillis(); + + // open the split file and modify it as splits finish + FSDataInputStream tmpIn = fs.open(splitFile); + byte[] rawData = new byte[tmpIn.available()]; + tmpIn.readFully(rawData); + tmpIn.close(); + FSDataOutputStream splitOut = fs.create(splitFile); + splitOut.write(rawData); + + try { + // *** split code *** + while (!daughterRegions.isEmpty()) { + LOG.debug(daughterRegions.size() + " RS have regions to splt."); + + // round-robin through the RS list + for (HServerAddress rsLoc = daughterRegions.firstKey(); + rsLoc != null; + rsLoc = daughterRegions.higherKey(rsLoc)) { + Pair dr = null; + + // find a region in the RS list that hasn't been moved + LOG.debug("Finding a region on " + rsLoc); + LinkedList> regionList + = daughterRegions.get(rsLoc); + while (!regionList.isEmpty()) { + dr = regionList.pop(); + + // get current region info + byte[] split = dr.getSecond(); + HRegionLocation regionLoc = table.getRegionLocation(split); + + // if this region moved locations + HServerAddress newRs = regionLoc.getServerAddress(); + if (newRs.compareTo(rsLoc) != 0) { + LOG.debug("Region with " + splitAlgo.rowToStr(split) + + " moved to " + newRs + ". Relocating..."); + // relocate it, don't use it right now + if (!daughterRegions.containsKey(newRs)) { + LinkedList> entry = Lists.newLinkedList(); + daughterRegions.put(newRs, entry); + } + daughterRegions.get(newRs).add(dr); + dr = null; + continue; + } + + // make sure this region wasn't already split + byte[] sk = regionLoc.getRegionInfo().getStartKey(); + if (sk.length != 0) { + if (Bytes.equals(split, sk)) { + LOG.debug("Region already split on " + + splitAlgo.rowToStr(split) + + ". Skipping this region..."); + dr = null; + continue; + } + byte[] start = dr.getFirst(); + Preconditions.checkArgument(Bytes.equals(start, sk), splitAlgo + .rowToStr(start) + " != " + splitAlgo.rowToStr(sk)); + } + + // passed all checks! found a good region + break; + } + if (regionList.isEmpty()) { + daughterRegions.remove(rsLoc); + } + if (dr == null) continue; + + // we have a good region, time to split! + + byte[] start = dr.getFirst(); + byte[] split = dr.getSecond(); + // request split + LOG.debug("Splitting at " + splitAlgo.rowToStr(split)); + HBaseAdmin admin = new HBaseAdmin(table.getConfiguration()); + admin.split(table.getTableName(), split); + + if (conf.getBoolean("split.verify", true)) { + // wait for one of the daughter regions to come online + boolean daughterOnline = false; + int daughterSleep = 5; // seconds + while (!daughterOnline) { + LOG.debug("Waiting for daughter region to come online..."); + table.clearRegionCache(); + HRegionInfo hri = table.getRegionLocation(split).getRegionInfo(); + daughterOnline = Bytes.equals(hri.getStartKey(), split) + && !hri.isOffline(); + daughterSleep = Math.min(daughterSleep * 2, 60); + Thread.sleep(daughterSleep * 1000); // sleep + } + LOG.debug("Daughter region is online."); + } + + // mark the region as successfully split. + // NOTE: split done, but daughter regions still need to major compact + splitOut.writeChars("- " + splitAlgo.rowToStr(dr.getFirst()) + " " + + splitAlgo.rowToStr(dr.getSecond()) + "\n"); + splitCount++; + if (splitCount % 10 == 0) { + long tDiff = (System.currentTimeMillis() - startTime) / splitCount; + LOG.debug("STATUS UPDATE: " + splitCount + " / " + origCount + + ". Avg Time / Split = " + + org.apache.hadoop.util.StringUtils.formatTime(tDiff)); + } + + if (conf.getBoolean("split.verify", true)) { + // if we have too many outstanding splits, wait for oldest ones to finish + outstanding.addLast(Pair.newPair(start, split)); + if (outstanding.size() > MAX_OUTSTANDING) { + Pair reg = outstanding.removeFirst(); + String outStart = splitAlgo.rowToStr(reg.getFirst()); + String outSplit = splitAlgo.rowToStr(reg.getSecond()); + LOG.debug("Waiting for " + outStart + " , " + outSplit + + " to finish compaction"); + // when a daughter region is opened, a compaction is triggered + // wait until compaction completes for both daughter regions + LinkedList check = Lists.newLinkedList(); + // figure out where this region should be in HDFS + check.add(table.getRegionLocation(reg.getFirst()).getRegionInfo()); + check.add(table.getRegionLocation(reg.getSecond()).getRegionInfo()); + while (!check.isEmpty()) { + // compaction is completed when all reference files are gone + for (HRegionInfo hri: check.toArray(new HRegionInfo[]{})) { + boolean refFound = false; + byte[] sk = hri.getStartKey(); + if (sk.length == 0) sk = splitAlgo.firstRow(); + String startKey = splitAlgo.rowToStr(sk); + // check every Column Family for that region + for (HColumnDescriptor c : hri.getTableDesc().getFamilies()) { + Path cfDir = Store.getStoreHomedir( + tableDir, hri.getEncodedName(), c.getName()); + if (fs.exists(cfDir)) { + for (FileStatus file : fs.listStatus(cfDir)) { + refFound |= StoreFile.isReference(file.getPath()); + if (refFound) { + LOG.debug("Reference still exists for " + startKey + + " at " + file.getPath()); + break; + } + } + } + if (refFound) break; + } + if (!refFound) { + check.remove(hri); + LOG.debug("- finished compaction of " + startKey); + } + } + // sleep in between requests + if (!check.isEmpty()) { + LOG.debug("Waiting for " + check.size() + " compactions"); + Thread.sleep(30 * 1000); + } + } + } + } + } + } + LOG.debug("All regions have been sucesfully split!"); + } finally { + long tDiff = System.currentTimeMillis() - startTime; + LOG.debug("TOTAL TIME = " + + org.apache.hadoop.util.StringUtils.formatTime(tDiff)); + LOG.debug("Splits = " + splitCount); + LOG.debug("Avg Time / Split = " + + org.apache.hadoop.util.StringUtils.formatTime(tDiff/splitCount)); + + splitOut.close(); + } + fs.delete(splitFile, false); + } + + static LinkedList> getSplits(HTable table, + SplitAlgorithm splitAlgo) throws IOException { + Path hbDir = new Path(table.getConfiguration().get(HConstants.HBASE_DIR)); + Path tableDir = HTableDescriptor.getTableDir(hbDir, table.getTableName()); + Path splitFile = new Path(tableDir, "_balancedSplit"); + FileSystem fs = FileSystem.get(table.getConfiguration()); + + // using strings because (new byte[]{0}).equals(new byte[]{0}) == false + Set> daughterRegions = Sets.newHashSet(); + + // does a split file exist? + if (!fs.exists(splitFile)) { + // NO = fresh start. calculate splits to make + LOG.debug("No _balancedSplit file. Calculating splits..."); + + // query meta for all regions in the table + Set> rows = Sets.newHashSet(); + Pair tmp = table.getStartEndKeys(); + Preconditions.checkArgument( + tmp.getFirst().length == tmp.getSecond().length, + "Start and End rows should be equivalent"); + for (int i = 0; i < tmp.getFirst().length; ++i) { + byte[] start = tmp.getFirst()[i], end = tmp.getSecond()[i]; + if (start.length == 0) start = splitAlgo.firstRow(); + if (end.length == 0) end = splitAlgo.lastRow(); + rows.add(Pair.newPair(start, end)); + } + LOG.debug("Table " + Bytes.toString(table.getTableName()) + " has " + + rows.size() + " regions that will be split."); + + // prepare the split file + Path tmpFile = new Path(tableDir, "_balancedSplit_prepare"); + FSDataOutputStream tmpOut = fs.create(tmpFile); + + // calculate all the splits == [daughterRegions] = [(start, splitPoint)] + for (Pair r : rows) { + byte[] splitPoint = splitAlgo.split(r.getFirst(), r.getSecond()); + String startStr = splitAlgo.rowToStr(r.getFirst()); + String splitStr = splitAlgo.rowToStr(splitPoint); + daughterRegions.add(Pair.newPair(startStr, splitStr)); + LOG.debug("Will Split [" + startStr + " , " + + splitAlgo.rowToStr(r.getSecond()) + ") at " + splitStr); + tmpOut.writeChars( + "+ " + startStr + splitAlgo.separator() + splitStr + "\n"); + } + tmpOut.close(); + fs.rename(tmpFile, splitFile); + } else { + LOG.debug("_balancedSplit file found. Replay log to restore state..."); + DistributedFileSystem dfs = (DistributedFileSystem)fs; + dfs.recoverLease(splitFile); + + // parse split file and process remaining splits + FSDataInputStream tmpIn = fs.open(splitFile); + StringBuilder sb = new StringBuilder(tmpIn.available()); + while (tmpIn.available() > 0) { + sb.append(tmpIn.readChar()); + } + tmpIn.close(); + for (String line : sb.toString().split("\n")) { + String[] cmd = line.split(splitAlgo.separator()); + Preconditions.checkArgument(3 == cmd.length); + byte[] start = splitAlgo.strToRow(cmd[1]); + String startStr = splitAlgo.rowToStr(start); + byte[] splitPoint = splitAlgo.strToRow(cmd[2]); + String splitStr = splitAlgo.rowToStr(splitPoint); + Pair r = Pair.newPair(startStr, splitStr); + if (cmd[0].equals("+")) { + LOG.debug("Adding: " + r); + daughterRegions.add(r); + } else { + LOG.debug("Removing: " + r); + Preconditions.checkArgument(cmd[0].equals("-"), + "Unknown option: " + cmd[0]); + Preconditions.checkState(daughterRegions.contains(r), + "Missing row: " + r); + daughterRegions.remove(r); + } + } + LOG.debug("Done reading. " + daughterRegions.size() + " regions left."); + } + LinkedList> ret = Lists.newLinkedList(); + for (Pair r : daughterRegions) { + ret.add(Pair.newPair(splitAlgo.strToRow(r.getFirst()), + splitAlgo.strToRow(r.getSecond()))); + } + return ret; + } + + /** + * MD5StringSplit is the default {@link SplitAlgorithm} for creating + * pre-split tables. The format of MD5StringSplit is the ASCII representation + * of an MD5 checksum. Row are long values in the range + * "00000000" => "7FFFFFFF" + * and are left-padded with zeros to keep the same order lexographically as + * if they were binary. + */ + public static class MD5StringSplit implements SplitAlgorithm { + final static String MAXMD5 = "7FFFFFFF"; + final static BigInteger MAXMD5_INT = new BigInteger(MAXMD5, 16); + final static int rowComparisonLength = MAXMD5.length(); + + public byte[] split(byte[] start, byte[] end) { + BigInteger s = convertToBigInteger(start); + BigInteger e = convertToBigInteger(end); + Preconditions.checkArgument(!e.equals(BigInteger.ZERO)); + return convertToByte(split2(s, e)); + } + + public byte[][] split(int n) { + BigInteger[] splits = new BigInteger[n - 1]; + BigInteger sizeOfEachSplit = MAXMD5_INT.divide(BigInteger.valueOf(n)); + for (int i = 1; i < n; i++) { + // NOTE: this means the last region gets all the slop. + // This is not a big deal if we're assuming n << MAXMD5 + splits[i - 1] = sizeOfEachSplit.multiply(BigInteger.valueOf(i)); + } + return convertToBytes(splits); + } + + public byte[] firstRow() { + return convertToByte(BigInteger.ZERO); + } + + public byte[] lastRow() { + return convertToByte(MAXMD5_INT); + } + + public byte[] strToRow(String in) { + return convertToByte(new BigInteger(in, 16)); + } + + public String rowToStr(byte[] row) { + return Bytes.toStringBinary(row); + } + + public String separator() { + return " "; + } + + static BigInteger split2(BigInteger minValue, BigInteger maxValue) { + return maxValue.add(minValue).divide(BigInteger.valueOf(2)); + } + + /** + * Returns an array of bytes corresponding to an array of BigIntegers + * + * @param bigIntegers + * @return bytes corresponding to the bigIntegers + */ + static byte[][] convertToBytes(BigInteger[] bigIntegers) { + byte[][] returnBytes = new byte[bigIntegers.length][]; + for (int i = 0; i < bigIntegers.length; i++) { + returnBytes[i] = convertToByte(bigIntegers[i]); + } + return returnBytes; + } + + /** + * Returns the bytes corresponding to the BigInteger + * + * @param bigInteger + * @return byte corresponding to input BigInteger + */ + static byte[] convertToByte(BigInteger bigInteger) { + String bigIntegerString = bigInteger.toString(16); + bigIntegerString = StringUtils.leftPad(bigIntegerString, + rowComparisonLength, '0'); + return Bytes.toBytes(bigIntegerString); + } + + /** + * Returns the BigInteger represented by thebyte array + * + * @param row + * @return the corresponding BigInteger + */ + static BigInteger convertToBigInteger(byte[] row) { + return (row.length > 0) ? new BigInteger(Bytes.toString(row), 16) + : BigInteger.ZERO; + } + } + +}