Index: src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java (revision 0) @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Arrays; + +/** + * A custom RegionSplitPolicy for testing. + * This class also demonstrates how to implement a SplitPolicy that groups + * rows by a prefix of the row-key + * + * This ensures that a region is not split "inside" + * a prefix of a row key. I.e. rows can be co-located by + * their prefix. + */ +public class PrefixSplitKeyPolicy extends ConstantSizeRegionSplitPolicy { + public static String PREFIX_LENGTH_KEY = "PrefixSplitKeyPolicy.prefix_length"; + + private int prefix_length; + + @Override + protected void configureForRegion(HRegion region) { + super.configureForRegion(region); + + if (region != null) { + // this demonstrates how a RegionSplitPolicy can be configured + // through HTableDescriptor values + prefix_length = Integer.parseInt(region.getTableDesc().getValue( + PREFIX_LENGTH_KEY)); + } + } + + @Override + protected byte[] getSplitPoint() { + byte[] splitPoint = super.getSplitPoint(); + if (splitPoint != null && splitPoint.length > 0) { + // group split keys by a prefix + return Arrays.copyOf(splitPoint, + Math.min(prefix_length, splitPoint.length)); + } else { + return splitPoint; + } + } +} Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java (revision 1239415) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java (working copy) @@ -20,6 +20,7 @@ import static org.junit.Assert.*; import java.io.IOException; +import java.util.Arrays; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; @@ -71,7 +72,42 @@ assertEquals(9999L, policy.getDesiredMaxFileSize()); } + /** + * Test setting up a customized split policy + */ @Test + public void testCustomPolicy() throws IOException { + HTableDescriptor myHtd = new HTableDescriptor(); + myHtd.setValue(HTableDescriptor.SPLIT_POLICY, + PrefixSplitKeyPolicy.class.getName()); + myHtd.setValue(PrefixSplitKeyPolicy.PREFIX_LENGTH_KEY, String.valueOf(2)); + + HRegion myMockRegion = Mockito.mock(HRegion.class); + Mockito.doReturn(myHtd).when(myMockRegion).getTableDesc(); + Mockito.doReturn(stores).when(myMockRegion).getStores(); + + Store mockStore = Mockito.mock(Store.class); + Mockito.doReturn(2000L).when(mockStore).getSize(); + Mockito.doReturn(true).when(mockStore).canSplit(); + Mockito.doReturn(Bytes.toBytes("abcd")).when(mockStore).getSplitPoint(); + stores.put(new byte[] { 1 }, mockStore); + + PrefixSplitKeyPolicy policy = (PrefixSplitKeyPolicy) RegionSplitPolicy + .create(myMockRegion, conf); + + assertEquals("ab", Bytes.toString(policy.getSplitPoint())); + + Mockito.doReturn(true).when(myMockRegion).shouldForceSplit(); + Mockito.doReturn(Bytes.toBytes("efgh")).when(myMockRegion) + .getExplicitSplitPoint(); + + policy = (PrefixSplitKeyPolicy) RegionSplitPolicy + .create(myMockRegion, conf); + + assertEquals("ef", Bytes.toString(policy.getSplitPoint())); + } + + @Test public void testConstantSizePolicy() throws IOException { htd.setMaxFileSize(1024L); Index: src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java (revision 1239415) +++ src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java (working copy) @@ -69,7 +69,7 @@ private static final String FAMILIES = "FAMILIES"; - private static final String SPLIT_POLICY = "SPLIT_POLICY"; + public static final String SPLIT_POLICY = "SPLIT_POLICY"; /** * INTERNAL Used by HBase Shell interface to access this metadata Index: src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java (revision 1239415) +++ src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java (working copy) @@ -23,12 +23,13 @@ * A {@link RegionSplitPolicy} implementation which splits a region * as soon as any of its store files exceeds a maximum configurable * size. + *
This is the default split policy.
*/ -class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy { +public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy { private long desiredMaxFileSize; @Override - void configureForRegion(HRegion region) { + protected void configureForRegion(HRegion region) { super.configureForRegion(region); long maxFileSize = region.getTableDesc().getMaxFileSize(); @@ -41,7 +42,7 @@ } @Override - boolean shouldSplit() { + protected boolean shouldSplit() { boolean force = region.shouldForceSplit(); boolean foundABigStore = false; Index: src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java (revision 1239415) +++ src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java (working copy) @@ -32,7 +32,7 @@ * A split policy determines when a region should be split. * {@see ConstantSizeRegionSplitPolicy} */ -abstract class RegionSplitPolicy extends Configured { +public abstract class RegionSplitPolicy extends Configured { private static final Class