Index: hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java (revision ac3d09e7fd0a003d2805456d16de2ba3abbf13d2) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java (date 1591028919687) @@ -381,6 +381,38 @@ assertEquals("ijk", Bytes.toString(policy.getSplitPoint())); } + @Test + public void testDelimitedKeyPrefixRegionSplitPolicyWithOrdinal() throws IOException { + HTableDescriptor myHtd = new HTableDescriptor(TableName.valueOf(name.getMethodName())); + myHtd.setValue(HTableDescriptor.SPLIT_POLICY, + DelimitedKeyPrefixRegionSplitPolicy.class.getName()); + myHtd.setValue(DelimitedKeyPrefixRegionSplitPolicy.DELIMITER_KEY, String.valueOf("!")); + myHtd.setValue(DelimitedKeyPrefixRegionSplitPolicy.DELIMITER_ORDINAL_KEY, String.valueOf("2")); + + HRegion myMockRegion = Mockito.mock(HRegion.class); + Mockito.doReturn(myHtd).when(myMockRegion).getTableDescriptor(); + Mockito.doReturn(stores).when(myMockRegion).getStores(); + + HStore mockStore = Mockito.mock(HStore.class); + Mockito.doReturn(2000L).when(mockStore).getSize(); + Mockito.doReturn(true).when(mockStore).canSplit(); + Mockito.doReturn(Optional.of(Bytes.toBytes("roniobird!hadoop-dw2@smf1!teragen!343444"))).when(mockStore).getSplitPoint(); + stores.add(mockStore); + + DelimitedKeyPrefixRegionSplitPolicy policy = (DelimitedKeyPrefixRegionSplitPolicy) RegionSplitPolicy + .create(myMockRegion, conf); + assertEquals("roniobird!hadoop-dw2@smf1", Bytes.toString(policy.getSplitPoint())); + + Mockito.doReturn(true).when(myMockRegion).shouldForceSplit(); + Mockito.doReturn(Bytes.toBytes("roniobird!hadoop-dw2@smf1!teragen!343444")).when(myMockRegion) + .getExplicitSplitPoint(); + myHtd.setValue(DelimitedKeyPrefixRegionSplitPolicy.DELIMITER_ORDINAL_KEY, String.valueOf("3")); + + policy = (DelimitedKeyPrefixRegionSplitPolicy) RegionSplitPolicy + .create(myMockRegion, conf); + assertEquals("roniobird!hadoop-dw2@smf1!teragen", Bytes.toString(policy.getSplitPoint())); + } + @Test public void testConstantSizePolicyWithJitter() throws IOException { conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java (revision ac3d09e7fd0a003d2805456d16de2ba3abbf13d2) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java (date 1591029568421) @@ -20,22 +20,30 @@ import java.util.Arrays; +import org.apache.commons.lang3.math.NumberUtils; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hbase.util.Bytes; +import static com.google.common.base.Preconditions.checkNotNull; + /** - * A custom RegionSplitPolicy implementing a SplitPolicy that groups - * rows by a prefix of the row-key with a delimiter. Only the first delimiter - * for the row key will define the prefix of the row key that is used for grouping. + * A custom RegionSplitPolicy implementing a SplitPolicy that groups rows by a prefix of the row-key + * with a delimiter and ordinance of delimiter(set to 1 by default ). Delimiter and ordinance of + * delimiter of rwo key will define the prefix of the row key that is used for grouping. * * This ensures that a region is not split "inside" a prefix of a row key. * I.e. rows can be co-located in a region by their prefix. * * As an example, if you have row keys delimited with _, like - * userid_eventtype_eventid, and use prefix delimiter _, this split policy - * ensures that all rows starting with the same userid, belongs to the same region. + * userid_eventtype_eventid, and use prefix delimiter _, default ordinal (1), + * this split policy ensures that all rows starting with the same userid, belongs to the same region. + * + * Another example, if you have row keys delimited with _, like + * userid_eventtype_eventid, and use prefix delimiter _, ordinal with 2, this split policy + * ensures that all rows starting with the same userid and eventtype belongs to the same region. + * * @see KeyPrefixRegionSplitPolicy */ @InterfaceAudience.Private @@ -44,8 +52,9 @@ private static final Logger LOG = LoggerFactory .getLogger(DelimitedKeyPrefixRegionSplitPolicy.class); public static final String DELIMITER_KEY = "DelimitedKeyPrefixRegionSplitPolicy.delimiter"; - + public static final String DELIMITER_ORDINAL_KEY = "DelimitedKeyPrefixRegionSplitPolicy.delimiterOrdinal"; private byte[] delimiter = null; + private int delimiterOrdinal; @Override protected void configureForRegion(HRegion region) { @@ -58,6 +67,13 @@ return; } delimiter = Bytes.toBytes(delimiterString); + + String delimiterOrdinalVal = region.getTableDescriptor().getValue(DELIMITER_ORDINAL_KEY); + if (delimiterOrdinalVal == null || delimiterOrdinalVal.length() == 0 || !NumberUtils.isDigits(delimiterOrdinalVal)) { + delimiterOrdinal = 1; + } else { + delimiterOrdinal = Integer.parseInt(delimiterOrdinalVal); + } } @Override @@ -66,8 +82,7 @@ if (splitPoint != null && delimiter != null) { //find the first occurrence of delimiter in split point - int index = - org.apache.hbase.thirdparty.com.google.common.primitives.Bytes.indexOf(splitPoint, delimiter); + int index = ordinalIndexOf(splitPoint, delimiter, delimiterOrdinal); if (index < 0) { LOG.warn("Delimiter " + Bytes.toString(delimiter) + " not found for split key " + Bytes.toString(splitPoint)); @@ -80,4 +95,24 @@ return splitPoint; } } + private int ordinalIndexOf(byte[] array, byte[] target, int ordinal) { + checkNotNull(array, "array"); + checkNotNull(target, "target"); + if (target.length == 0) { + return 0; + } + int ordinalIndex = 0; + outer: + for (int i = 0; i < array.length - target.length + 1; i++) { + for (int j = 0; j < target.length; j++) { + if (array[i + j] != target[j]) { + continue outer; + } + } + ordinalIndex++; + if (ordinal == ordinalIndex) + return i; + } + return -1; + } }