Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java (revision 1293070) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java (working copy) @@ -17,14 +17,22 @@ */ package org.apache.hadoop.hbase.regionserver; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; +import java.util.List; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Before; import org.junit.Test; @@ -38,14 +46,13 @@ private HTableDescriptor htd; private HRegion mockRegion; private TreeMap stores; + private static final byte [] TABLENAME = new byte [] {'t'}; @Before public void setupMocks() { conf = HBaseConfiguration.create(); - - HRegionInfo hri = new HRegionInfo(Bytes.toBytes("testtable")); - - htd = new HTableDescriptor(); + HRegionInfo hri = new HRegionInfo(TABLENAME); + htd = new HTableDescriptor(TABLENAME); mockRegion = Mockito.mock(HRegion.class); Mockito.doReturn(htd).when(mockRegion).getTableDesc(); Mockito.doReturn(hri).when(mockRegion).getRegionInfo(); @@ -55,6 +62,64 @@ } @Test + public void testIncreasingToUpperBoundRegionSplitPolicy() throws IOException { + // Configure IncreasingToUpperBoundRegionSplitPolicy as our split policy + conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, + IncreasingToUpperBoundRegionSplitPolicy.class.getName()); + // Now make it so the mock region has a RegionServerService that will + // return 'online regions'. + RegionServerServices rss = Mockito.mock(RegionServerServices.class); + final List regions = new ArrayList(); + Mockito.when(rss.getOnlineRegions(TABLENAME)).thenReturn(regions); + Mockito.when(mockRegion.getRegionServerServices()).thenReturn(rss); + // Set max size for this 'table'. + long maxSplitSize = 1024L; + htd.setMaxFileSize(maxSplitSize); + // Set flush size to 1/4. This is what the IncreasingToUpperBoundRegionSplitPolicy + // grows by each time there is an extra region. + long flushSize = maxSplitSize/4; + conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSize); + // If RegionServerService with no regions in it -- 'online regions' == 0 -- + // then IncreasingToUpperBoundRegionSplitPolicy should act like a + // ConstantSizePolicy + IncreasingToUpperBoundRegionSplitPolicy policy = + (IncreasingToUpperBoundRegionSplitPolicy)RegionSplitPolicy.create(mockRegion, conf); + doConstantSizePolicyTests(policy); + + // Add a store in excess of split size. Because there are "no regions" + // on this server -- rss.getOnlineRegions is 0 -- then we should split + // like a constantsizeregionsplitpolicy would + Store mockStore = Mockito.mock(Store.class); + Mockito.doReturn(2000L).when(mockStore).getSize(); + Mockito.doReturn(true).when(mockStore).canSplit(); + stores.put(new byte[]{1}, mockStore); + // It should split + assertTrue(policy.shouldSplit()); + + // Now test that we increase our split size as online regions for a table + // grows. With one region, split size should be flushsize. + regions.add(mockRegion); + Mockito.doReturn(flushSize/2).when(mockStore).getSize(); + // Should not split since store is 1/2 flush size. + assertFalse(policy.shouldSplit()); + // Set size of store to be > flush size and we should split + Mockito.doReturn(flushSize + 1).when(mockStore).getSize(); + assertTrue(policy.shouldSplit()); + // Add another region to the 'online regions' on this server and we should + // now be no longer be splittable since split size has gone up. + regions.add(mockRegion); + assertFalse(policy.shouldSplit()); + // Double store size and make sure its just over; verify it'll split + Mockito.doReturn((flushSize * 2) + 1).when(mockStore).getSize(); + assertTrue(policy.shouldSplit()); + + // Finally assert that even if loads of regions, we'll split at max size + assertEquals(maxSplitSize, policy.getSizeToCheck(1000)); + // Assert same is true if count of regions is zero. + assertEquals(maxSplitSize, policy.getSizeToCheck(0)); + } + + @Test public void testCreateDefault() throws IOException { conf.setLong(HConstants.HREGION_MAX_FILESIZE, 1234L); @@ -110,10 +175,16 @@ @Test public void testConstantSizePolicy() throws IOException { htd.setMaxFileSize(1024L); - ConstantSizeRegionSplitPolicy policy = (ConstantSizeRegionSplitPolicy)RegionSplitPolicy.create(mockRegion, conf); + doConstantSizePolicyTests(policy); + } + /** + * Run through tests for a ConstantSizeRegionSplitPolicy + * @param policy + */ + private void doConstantSizePolicyTests(final ConstantSizeRegionSplitPolicy policy) { // For no stores, should not split assertFalse(policy.shouldSplit()); @@ -141,6 +212,9 @@ // Turn off forceSplit, should not split Mockito.doReturn(false).when(mockRegion).shouldForceSplit(); assertFalse(policy.shouldSplit()); + + // Clear families we added above + stores.clear(); } @Test @@ -178,5 +252,4 @@ @org.junit.Rule public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); -} - +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 1293070) +++ src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -227,6 +227,9 @@ public static final String HREGION_MAX_FILESIZE = "hbase.hregion.max.filesize"; + /** Default maximum file size */ + public static final long DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024 * 1024; + /** * The max number of threads used for opening and closing stores or store * files in parallel @@ -240,8 +243,6 @@ */ public static final int DEFAULT_HSTORE_OPEN_AND_CLOSE_THREADS_MAX = 1; - /** Default maximum file size */ - public static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024; /** Conf key for the memstore size at which we flush the memstore */ public static final String HREGION_MEMSTORE_FLUSH_SIZE = Index: src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java (revision 1293070) +++ src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java (working copy) @@ -158,7 +158,7 @@ * Constant that denotes the maximum default size of the memstore after which * the contents are flushed to the store files */ - public static final long DEFAULT_MEMSTORE_FLUSH_SIZE = 1024*1024*64L; + public static final long DEFAULT_MEMSTORE_FLUSH_SIZE = 1024*1024*128L; private volatile Boolean meta = null; private volatile Boolean root = null; Index: src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java (revision 1293070) +++ src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java (working copy) @@ -33,8 +33,8 @@ * {@see ConstantSizeRegionSplitPolicy} */ public abstract class RegionSplitPolicy extends Configured { - private static final Class - DEFAULT_SPLIT_POLICY_CLASS = ConstantSizeRegionSplitPolicy.class; + private static final Class + DEFAULT_SPLIT_POLICY_CLASS = IncreasingToUpperBoundRegionSplitPolicy.class; /** * The region configured for this split policy. Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 1293070) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -780,6 +780,14 @@ return this.regionInfo; } + /** + * @return Instance of {@link RegionServerServices} used by this HRegion. + * Can be null. + */ + RegionServerServices getRegionServerServices() { + return this.rsServices; + } + /** @return requestsCount for this region */ public long getRequestsCount() { return this.readRequestsCount.get() + this.writeRequestsCount.get(); Index: src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/regionserver/IncreasingToUpperBoundRegionSplitPolicy.java (revision 0) @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Split size is the number of regions that are on this server that all are + * of the same table, squared, times the region flush size OR the maximum + * region split size, whichever is smaller. For example, if the flush size + * is 128M, then on first flush we will split which will make two regions + * that will split when their size is 2 * 2 * 128M = 512M. If one of these + * regions splits, then there are three regions and now the split size is + * 3 * 3 * 128M = 1152M, and so on until we reach the configured + * maximum filesize and then from there on out, we'll use that. + */ +public class IncreasingToUpperBoundRegionSplitPolicy +extends ConstantSizeRegionSplitPolicy { + static final Log LOG = + LogFactory.getLog(IncreasingToUpperBoundRegionSplitPolicy.class); + private long flushSize; + + @Override + protected void configureForRegion(HRegion region) { + super.configureForRegion(region); + this.flushSize = region.getTableDesc() != null? + region.getTableDesc().getMemStoreFlushSize(): + getConf().getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, + HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE); + } + + @Override + protected boolean shouldSplit() { + boolean force = region.shouldForceSplit(); + boolean foundABigStore = false; + // Get count of regions that have the same common table as this.region + int tableRegionsCount = getCountOfCommonTableRegions(); + // Get size to check + long sizeToCheck = getSizeToCheck(tableRegionsCount); + + for (Store store : region.getStores().values()) { + // If any of the stores are unable to split (eg they contain reference files) + // then don't split + if ((!store.canSplit())) { + return false; + } + + // Mark if any store is big enough + long size = store.getSize(); + if (size > sizeToCheck) { + LOG.debug("ShouldSplit because " + store.getColumnFamilyName() + + " size=" + size + ", sizeToCheck=" + sizeToCheck + + ", regionsWithCommonTable=" + tableRegionsCount); + foundABigStore = true; + } + } + + return foundABigStore || force; + } + + /** + * @return Region max size or count of regions squared * flushsize, which ever is + * smaller; guard against there being zero regions on this server. + */ + long getSizeToCheck(final int tableRegionsCount) { + return tableRegionsCount == 0? getDesiredMaxFileSize(): + Math.min(getDesiredMaxFileSize(), + this.flushSize * (tableRegionsCount * tableRegionsCount)); + } + + /** + * @return Count of regions on this server that share the table this.region + * belongs to + */ + private int getCountOfCommonTableRegions() { + RegionServerServices rss = this.region.getRegionServerServices(); + // Can be null in tests + if (rss == null) return 0; + byte [] tablename = this.region.getTableDesc().getName(); + int tableRegionsCount = 0; + try { + List hri = rss.getOnlineRegions(tablename); + tableRegionsCount = hri == null || hri.isEmpty()? 0: hri.size(); + } catch (IOException e) { + LOG.debug("Failed getOnlineRegions " + Bytes.toString(tablename), e); + } + return tableRegionsCount; + } +} Index: src/main/resources/hbase-default.xml =================================================================== --- src/main/resources/hbase-default.xml (revision 1293070) +++ src/main/resources/hbase-default.xml (working copy) @@ -389,11 +389,11 @@ hbase.hregion.max.filesize - 1073741824 + 10737418240 Maximum HStoreFile size. If any one of a column families' HStoreFiles has grown to exceed this value, the hosting HRegion is split in two. - Default: 1G. + Default: 10G.