diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java new file mode 100644 index 0000000..938bd5d --- /dev/null +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import java.util.Arrays; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * A custom RegionSplitPolicy implementing a SplitPolicy that groups + * rows by a prefix of the row-key with a delimiter. + * + * This ensures that a region is not split "inside" a prefix of a row key. + * I.e. rows can be co-located in a regionb by their prefix. + * + * As and example, if you have row keys delimited with _, like + * userid_eventtype_eventid, and use prefix delimiter _, this split policy + * ensures that all rows staring with the same userid, belongs to the same region. + * @see KeyPrefixRegionSplitPolicy + */ +public class DelimitedKeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy { + + private static final Log LOG = LogFactory + .getLog(DelimitedKeyPrefixRegionSplitPolicy.class); + public static final String DELIMITER_KEY = "DelimitedKeyPrefixRegionSplitPolicy.delimiter"; + + private byte[] delimiter = null; + + @Override + protected void configureForRegion(HRegion region) { + super.configureForRegion(region); + if (region != null) { + + // read the prefix length from the table descriptor + String delimiterString = region.getTableDesc().getValue( + DELIMITER_KEY); + if (delimiterString == null || delimiterString.length() == 0) { + LOG.error(DELIMITER_KEY + " not specified for table " + + region.getTableDesc().getNameAsString() + + ". Using default RegionSplitPolicy"); + return; + } + + delimiter = Bytes.toBytes(delimiterString); + } + } + + @Override + protected byte[] getSplitPoint() { + byte[] splitPoint = super.getSplitPoint(); + if (delimiter != null) { + + //find the first occurrence of delimiter in split point + int index = com.google.common.primitives.Bytes.indexOf(splitPoint, delimiter); + if (index < 0) { + LOG.warn("Delimiter " + Bytes.toString(delimiter) + " not found for split key " + + Bytes.toString(splitPoint)); + return splitPoint; + } + + // group split keys by a prefix + return Arrays.copyOf(splitPoint, Math.min(index, splitPoint.length)); + } else { + return splitPoint; + } + } +} diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java index 766057b..d163319 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java @@ -28,7 +28,6 @@ import java.util.List; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.CompoundConfiguration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; @@ -251,4 +250,40 @@ public class TestRegionSplitPolicy { Bytes.toString(policy.getSplitPoint())); } + @Test + public void testDelimitedKeyPrefixRegionSplitPolicy() throws IOException { + HTableDescriptor myHtd = new HTableDescriptor(); + myHtd.setValue(HTableDescriptor.SPLIT_POLICY, + DelimitedKeyPrefixRegionSplitPolicy.class.getName()); + myHtd.setValue(DelimitedKeyPrefixRegionSplitPolicy.DELIMITER_KEY, ","); + + HRegion myMockRegion = Mockito.mock(HRegion.class); + Mockito.doReturn(myHtd).when(myMockRegion).getTableDesc(); + Mockito.doReturn(stores).when(myMockRegion).getStores(); + + HStore mockStore = Mockito.mock(HStore.class); + Mockito.doReturn(2000L).when(mockStore).getSize(); + Mockito.doReturn(true).when(mockStore).canSplit(); + Mockito.doReturn(Bytes.toBytes("ab,cd")).when(mockStore).getSplitPoint(); + stores.put(new byte[] { 1 }, mockStore); + + DelimitedKeyPrefixRegionSplitPolicy policy = (DelimitedKeyPrefixRegionSplitPolicy) RegionSplitPolicy + .create(myMockRegion, conf); + + assertEquals("ab", Bytes.toString(policy.getSplitPoint())); + + Mockito.doReturn(true).when(myMockRegion).shouldForceSplit(); + Mockito.doReturn(Bytes.toBytes("efg,h")).when(myMockRegion) + .getExplicitSplitPoint(); + + policy = (DelimitedKeyPrefixRegionSplitPolicy) RegionSplitPolicy + .create(myMockRegion, conf); + + assertEquals("efg", Bytes.toString(policy.getSplitPoint())); + + Mockito.doReturn(Bytes.toBytes("ijk")).when(myMockRegion) + .getExplicitSplitPoint(); + assertEquals("ijk", Bytes.toString(policy.getSplitPoint())); + } + }