From aed1e4dd88f2192f883348bcdeb99235b4609201 Mon Sep 17 00:00:00 2001 From: Mikhail Antonov Date: Wed, 1 Apr 2015 07:43:43 -0700 Subject: [PATCH] HBASE-13103 [ergonomics] add region size balancing as a feature of master --- .../java/org/apache/hadoop/hbase/HConstants.java | 8 + hbase-common/src/main/resources/hbase-default.xml | 20 ++ .../org/apache/hadoop/hbase/master/HMaster.java | 74 ++++++++ .../regionnormalizer/EmptyNormalizationPlan.java | 32 ++++ .../regionnormalizer/MergeNormalizationPlan.java | 81 ++++++++ .../master/regionnormalizer/NormalizationPlan.java | 39 ++++ .../master/regionnormalizer/RegionNormalizer.java | 63 +++++++ .../regionnormalizer/RegionNormalizerChore.java | 53 ++++++ .../regionnormalizer/RegionNormalizerFactory.java | 45 +++++ .../regionnormalizer/SimpleRegionNormalizer.java | 155 ++++++++++++++++ .../regionnormalizer/SplitNormalizationPlan.java | 83 +++++++++ .../TestSimpleRegionNormalizer.java | 205 +++++++++++++++++++++ .../TestSimpleRegionNormalizerOnCluster.java | 147 +++++++++++++++ 13 files changed, 1005 insertions(+) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/EmptyNormalizationPlan.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/MergeNormalizationPlan.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/NormalizationPlan.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizer.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizerChore.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizerFactory.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/SimpleRegionNormalizer.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/SplitNormalizationPlan.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/regionnormalizer/TestSimpleRegionNormalizer.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/regionnormalizer/TestSimpleRegionNormalizerOnCluster.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 19e251a..0f926fb 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -123,6 +123,14 @@ public final class HConstants { /** Config for pluggable load balancers */ public static final String HBASE_MASTER_LOADBALANCER_CLASS = "hbase.master.loadbalancer.class"; + /** Config for pluggable region normalizer */ + public static final String HBASE_MASTER_REGIONNORMALIZER_CLASS = + "hbase.master.regionnormalizer.class"; + + /** Config for enabling/disabling pluggable region normalizer */ + public static final String HBASE_NORMALIZER_ENABLED = + "hbase.normalizer.enabled"; + /** Cluster is standalone or pseudo-distributed */ public static final boolean CLUSTER_IS_LOCAL = false; diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index 93776d7..3828a2d 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -576,6 +576,17 @@ possible configurations would overwhelm and obscure the important. Period at which the region balancer runs in the Master. + hbase.normalizer.enabled + false + If set to true, Master will try to keep region size + within each table approximately the same. + + + hbase.normalizer.period + 300000 + Period at which the region normalizer runs in the Master. + + hbase.regions.slop 0.2 Rebalance if any regionserver has average + (average * slop) regions. @@ -1382,6 +1393,15 @@ possible configurations would overwhelm and obscure the important. + hbase.master.regionnormalizer.class + org.apache.hadoop.hbase.master.regionnormalizer.SimpleRegionNormalizer + + Class used to execute the region normalization when the period occurs. + See the class comment for more on how it works + http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/master/regionnormalizer/SimpleRegionNormalizer.html + + + hbase.security.exec.permission.checks false diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index cc7c2a1..9635609 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.TableState; @@ -101,6 +102,12 @@ import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler; import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler; import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler; import org.apache.hadoop.hbase.master.handler.TruncateTableHandler; +import org.apache.hadoop.hbase.master.regionnormalizer.MergeNormalizationPlan; +import org.apache.hadoop.hbase.master.regionnormalizer.NormalizationPlan; +import org.apache.hadoop.hbase.master.regionnormalizer.RegionNormalizer; +import org.apache.hadoop.hbase.master.regionnormalizer.RegionNormalizerChore; +import org.apache.hadoop.hbase.master.regionnormalizer.RegionNormalizerFactory; +import org.apache.hadoop.hbase.master.regionnormalizer.SplitNormalizationPlan; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer; import org.apache.hadoop.hbase.monitoring.MonitoredTask; @@ -260,7 +267,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server { private volatile boolean serverShutdownHandlerEnabled = false; LoadBalancer balancer; + RegionNormalizer normalizer; + private boolean normalizerEnabled = false; private BalancerChore balancerChore; + private RegionNormalizerChore normalizerChore; private ClusterStatusChore clusterStatusChore; private ClusterStatusPublisher clusterStatusPublisherChore = null; @@ -526,6 +536,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { void initializeZKBasedSystemTrackers() throws IOException, InterruptedException, KeeperException, CoordinatedStateException { this.balancer = LoadBalancerFactory.getLoadBalancer(conf); + this.normalizer = RegionNormalizerFactory.getRegionNormalizer(conf); + this.normalizer.setMasterServices(this); + this.normalizerEnabled = conf.getBoolean(HConstants.HBASE_NORMALIZER_ENABLED, false); this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this); this.loadBalancerTracker.start(); this.assignmentManager = new AssignmentManager(this, serverManager, @@ -726,6 +739,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server { getChoreService().scheduleChore(clusterStatusChore); this.balancerChore = new BalancerChore(this); getChoreService().scheduleChore(balancerChore); + this.normalizerChore = new RegionNormalizerChore(this); + getChoreService().scheduleChore(normalizerChore); this.catalogJanitorChore = new CatalogJanitor(this, this); getChoreService().scheduleChore(catalogJanitorChore); @@ -1059,6 +1074,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { if (this.balancerChore != null) { this.balancerChore.cancel(true); } + if (this.normalizerChore != null) { + this.normalizerChore.cancel(true); + } if (this.clusterStatusChore != null) { this.clusterStatusChore.cancel(true); } @@ -1188,6 +1206,62 @@ public class HMaster extends HRegionServer implements MasterServices, Server { return true; } + public boolean normalizeRegions() throws IOException { + if (!this.initialized) { + LOG.info("Master has not been initialized, don't run region normalizer."); + return false; + } + + if (!this.normalizerEnabled) { + LOG.info("Region normalization is disabled, don't run region normalizer."); + return false; + } + + synchronized (this.normalizer) { + // Don't run more than 1 normalization plan per table + Set allEnabledTabbles = this.tableStateManager. + getTablesInStates(TableState.State.ENABLED); + + for(TableName table : allEnabledTabbles) { + if (table.isSystemTable()) { + continue; + } + List regions = this.assignmentManager.getRegionStates(). + getRegionsOfTable(table); + doNormalize(this.normalizer.computePlanForTable(table, regions)); + } + } + // If Region did not generate any plans, it means the cluster is already balanced. + // Return true indicating a success. + return true; + } + + private void doNormalize(NormalizationPlan normalizationPlan) { + LOG.info("Executing normalization plan: " + normalizationPlan); + switch (normalizationPlan.getType()) { + case SPLIT: { + SplitNormalizationPlan splittingPlan = (SplitNormalizationPlan) normalizationPlan; + try { + Admin admin = clusterConnection.getAdmin(); + admin.splitRegion(splittingPlan.getRegionInfo().getRegionName()); + } catch (IOException ex) { + LOG.error("Error during region split: " + ex); + } + break; + } + case MERGE: { + MergeNormalizationPlan mergingPlan = (MergeNormalizationPlan) normalizationPlan; + try { + Admin admin = clusterConnection.getAdmin(); + admin.mergeRegions(mergingPlan.getFirstRegion().getEncodedNameAsBytes(), + mergingPlan.getSecondRegion().getEncodedNameAsBytes(), true); + } catch (IOException ex) { + LOG.error("Error during region merge: " + ex); + } + } + } + } + /** * @return Client info for use as prefix on an audit log string; who did an action */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/EmptyNormalizationPlan.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/EmptyNormalizationPlan.java new file mode 100644 index 0000000..e4c46a9 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/EmptyNormalizationPlan.java @@ -0,0 +1,32 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +/** + * Plan which signifies that no normalization is required, + * or normalization of this table isn't allowed. + */ +@InterfaceAudience.Private +public class EmptyNormalizationPlan extends NormalizationPlan { + public EmptyNormalizationPlan() { + this.setType(TYPE.NOTHING); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/MergeNormalizationPlan.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/MergeNormalizationPlan.java new file mode 100644 index 0000000..e5d2112 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/MergeNormalizationPlan.java @@ -0,0 +1,81 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +/** + * Normalization plan to merge regions (smallest in the table). + */ +@InterfaceAudience.Private +public class MergeNormalizationPlan extends NormalizationPlan { + private HRegionInfo firstRegion; + private HRegionInfo secondRegion; + + public MergeNormalizationPlan(HRegionInfo firstRegion, HRegionInfo secondRegion) { + this.setType(TYPE.MERGE); + this.firstRegion = firstRegion; + this.secondRegion = secondRegion; + } + + public HRegionInfo getFirstRegion() { + return firstRegion; + } + + public void setFirstRegion(HRegionInfo firstRegion) { + this.firstRegion = firstRegion; + } + + public HRegionInfo getSecondRegion() { + return secondRegion; + } + + public void setSecondRegion(HRegionInfo secondRegion) { + this.secondRegion = secondRegion; + } + + @Override + public String toString() { + return "MergeNormalizationPlan{" + + "firstRegion=" + firstRegion + + ", secondRegion=" + secondRegion + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof MergeNormalizationPlan)) return false; + + MergeNormalizationPlan that = (MergeNormalizationPlan) o; + + if (!firstRegion.equals(that.firstRegion)) return false; + if (!secondRegion.equals(that.secondRegion)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = firstRegion.hashCode(); + result = 31 * result + secondRegion.hashCode(); + return result; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/NormalizationPlan.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/NormalizationPlan.java new file mode 100644 index 0000000..aad8dec --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/NormalizationPlan.java @@ -0,0 +1,39 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +/** + * Abstract normalization command. May be SPLIT, MERGE or NOTHING. + */ +@InterfaceAudience.Private +public abstract class NormalizationPlan { + public static enum TYPE {SPLIT, MERGE, NOTHING} + + protected TYPE type; + + public TYPE getType() { + return type; + } + + public void setType(TYPE type) { + this.type = type; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizer.java new file mode 100644 index 0000000..ea8daf3 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizer.java @@ -0,0 +1,63 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.master.MasterServices; + +import java.util.List; + +/** + * Performs "normalization" of regions on the cluster, making sure that suboptimal + * choice of split keys doesn't leave cluster in a situation when some regions are + * substantially larger than others for considerable amount of time. + * + * Users who want to use this feature could either use default {@link SimpleRegionNormalizer} + * or plug in their own implementation. Please note that overly aggressive normalization rules + * (attempting to make all regions perfectly equal in size) could potentially lead to + * "split/merge storms". + */ +@InterfaceAudience.Private +public interface RegionNormalizer { + /** + * Initialize the region normalizer. Must be called before first call to + * {@link #computePlanForTable(TableName, List)}. + * @throws HBaseIOException + */ + void initialize() throws HBaseIOException; + + /** + * Set the master service. Must be called before first call to + * {@link #computePlanForTable(TableName, List)}. + * @param masterServices + */ + void setMasterServices(MasterServices masterServices); + + /** + * Computes next optimal normalization plan. + * @param table table to normalize + * @param tableRegions current table layout + * @return Next (perhaps most urgent) normalization action to perform + */ + NormalizationPlan computePlanForTable(TableName table, List tableRegions) + throws HBaseIOException; +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizerChore.java new file mode 100644 index 0000000..f6f3554 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizerChore.java @@ -0,0 +1,53 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.ScheduledChore; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.master.HMaster; + +import java.io.IOException; + +/** + * Chore that will call {@link org.apache.hadoop.hbase.master.HMaster#normalizeRegions()} + * when needed. + */ +@InterfaceAudience.Private +public class RegionNormalizerChore extends ScheduledChore { + private static final Log LOG = LogFactory.getLog(RegionNormalizerChore.class); + + private final HMaster master; + + public RegionNormalizerChore(HMaster master) { + super(master.getServerName() + "-RegionNormalizerChore", master, + master.getConfiguration().getInt("hbase.normalizer.period", 1800000)); + this.master = master; + } + + @Override + protected void chore() { + try { + master.normalizeRegions(); + } catch (IOException e) { + LOG.error("Failed to normalize regions.", e); + } + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizerFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizerFactory.java new file mode 100644 index 0000000..4c165dd --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/RegionNormalizerFactory.java @@ -0,0 +1,45 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.util.ReflectionUtils; + +/** + * Factory to create instance of {@link RegionNormalizer} as configured. + */ +@InterfaceAudience.Private +public class RegionNormalizerFactory { + /** + * Create a region normalizer from the given conf. + * @param conf configuration + * @return {@link RegionNormalizer} implementation + */ + public static RegionNormalizer getRegionNormalizer(Configuration conf) { + + // Create instance of Region Normalizer + Class balancerKlass = + conf.getClass(HConstants.HBASE_MASTER_REGIONNORMALIZER_CLASS, SimpleRegionNormalizer.class, + RegionNormalizer.class); + return ReflectionUtils.newInstance(balancerKlass, conf); + + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/SimpleRegionNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/SimpleRegionNormalizer.java new file mode 100644 index 0000000..34c1e08 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/SimpleRegionNormalizer.java @@ -0,0 +1,155 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.RegionLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.util.Pair; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * Simple implementation of region normalizer. + * + * Logic in use: + * + * - get all regions of a given table + * - get avg size S of each region (by total size of store files reported in RegionLoad) + * - If biggest region is bigger than S * 2, it is kindly requested to split, + * and normalization step stops + * - Otherwise, two smallest regions {R1, R2} are kindly requested to merge, + * if both are smaller than S / 2, and normalization stops + * - Otherwise, no action is performed + */ +@InterfaceAudience.Private +public class SimpleRegionNormalizer implements RegionNormalizer { + private static final Log LOG = LogFactory.getLog(SimpleRegionNormalizer.class); + private MasterServices masterServices; + + /** + * Set the master service. + * @param masterServices + */ + public void setMasterServices(MasterServices masterServices) { + this.masterServices = masterServices; + } + + /** + * Initialize the region normalizer. Must be called after setters. + * + * @throws org.apache.hadoop.hbase.HBaseIOException + */ + @Override + public void initialize() throws HBaseIOException { + } + + /** + * Computes next most "urgent" normalization action on the table. + * Action may be either a split, or a merge. + * + * @param table table to normalize + * @param tableRegions current table layout + * @return List of plans + */ + @Override + public NormalizationPlan computePlanForTable(TableName table, List tableRegions) + throws HBaseIOException { + if (table == null || table.isSystemTable()) { + LOG.info("Normalization of table " + table + " isn't allowed"); + return new EmptyNormalizationPlan(); + } + + //TODO: should we make min number of regions a config param? + if (tableRegions == null || tableRegions.size() < 3) { + LOG.info("Table " + table + " doesn't have enough regions to normalize"); + return new EmptyNormalizationPlan(); + } + + LOG.info("Computing normalization plan for table: " + table + + ", number of regions: " + tableRegions.size()); + + long totalSizeMb = 0; + Pair largestRegionFound = null; + + // A is a smallest region, B is a second smallest + Pair smallestRegionAFound = null; + Pair smallestRegionBFound = null; + + for (HRegionInfo hri : tableRegions) { + ServerName sn = masterServices.getAssignmentManager().getRegionStates(). + getRegionServerOfRegion(hri); + RegionLoad regionLoad = masterServices.getServerManager().getLoad(sn). + getRegionsLoad().get(hri.getRegionName()); + int regionSize = regionLoad.getStorefileSizeMB(); + totalSizeMb += regionSize; + + if (largestRegionFound == null || regionSize > largestRegionFound.getSecond()) { + largestRegionFound = new Pair(hri, regionSize); + } + + if (smallestRegionAFound == null) { + smallestRegionAFound = new Pair(hri, regionSize); + } else { + if (regionSize < smallestRegionAFound.getSecond()) { + smallestRegionBFound = smallestRegionAFound; + smallestRegionAFound = new Pair(hri, regionSize); + } else { + if (smallestRegionBFound == null || regionSize < smallestRegionBFound.getSecond()) { + smallestRegionBFound = new Pair(hri, regionSize); + } + } + } + } + + double avgRegionSize = totalSizeMb / (double) tableRegions.size(); + + LOG.info("Table " + table + ", total aggregated regions size: " + totalSizeMb); + LOG.info("Table " + table + ", average region size: " + avgRegionSize); + + // now; if the largest region is >2 times large than average, we split it, split + // is more high priority normalization action than merge. + if (largestRegionFound.getSecond() > 2 * avgRegionSize) { + LOG.info("Table " + table + ", largest region " + + largestRegionFound.getFirst().getRegionName() + " has size " + + largestRegionFound.getSecond() + ", more than 2 times than avg size, splitting"); + return new SplitNormalizationPlan(largestRegionFound.getFirst(), null); + } else { + if ((smallestRegionAFound.getSecond() < 0.5 * avgRegionSize) && + (smallestRegionBFound.getSecond() < 0.5 * avgRegionSize)) { + LOG.info("Table " + table + ", 2 smallest regions have sizes, accordingly " + + smallestRegionAFound.getSecond() + " and " + smallestRegionBFound.getSecond() + + ", less than half the avg size, merging them"); + return new MergeNormalizationPlan(smallestRegionAFound.getFirst(), + smallestRegionBFound.getFirst()); + } else { + LOG.info("No normalization needed, table regions look good"); + return new EmptyNormalizationPlan(); + } + } + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/SplitNormalizationPlan.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/SplitNormalizationPlan.java new file mode 100644 index 0000000..7af1f3c --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/regionnormalizer/SplitNormalizationPlan.java @@ -0,0 +1,83 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +import java.util.Arrays; + +/** + * Normalization plan to split region. + */ +@InterfaceAudience.Private +public class SplitNormalizationPlan extends NormalizationPlan { + private HRegionInfo regionInfo; + private byte[] splitPoint; + + public SplitNormalizationPlan(HRegionInfo regionInfo, byte[] splitPoint) { + this.setType(TYPE.SPLIT); + this.regionInfo = regionInfo; + this.splitPoint = splitPoint; + } + + public HRegionInfo getRegionInfo() { + return regionInfo; + } + + public void setRegionInfo(HRegionInfo regionInfo) { + this.regionInfo = regionInfo; + } + + public byte[] getSplitPoint() { + return splitPoint; + } + + public void setSplitPoint(byte[] splitPoint) { + this.splitPoint = splitPoint; + } + + @Override + public String toString() { + return "SplitNormalizationPlan{" + + "regionInfo=" + regionInfo + + ", splitPoint=" + Arrays.toString(splitPoint) + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof SplitNormalizationPlan)) return false; + + SplitNormalizationPlan that = (SplitNormalizationPlan) o; + + if (!regionInfo.equals(that.regionInfo)) return false; + if (!Arrays.equals(splitPoint, that.splitPoint)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = regionInfo.hashCode(); + result = 31 * result + (splitPoint != null ? Arrays.hashCode(splitPoint) : 0); + return result; + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/regionnormalizer/TestSimpleRegionNormalizer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/regionnormalizer/TestSimpleRegionNormalizer.java new file mode 100644 index 0000000..762719c --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/regionnormalizer/TestSimpleRegionNormalizer.java @@ -0,0 +1,205 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import com.google.common.collect.Lists; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.RegionLoad; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.when; + +/** + * Tests logic of {@link SimpleRegionNormalizer}. + */ +@Category({MasterTests.class, SmallTests.class}) +public class TestSimpleRegionNormalizer { + private static final Log LOG = LogFactory.getLog(TestSimpleRegionNormalizer.class); + + private static RegionNormalizer normalizer; + + // mocks + private static MasterServices masterServices; + + @BeforeClass + public static void beforeAllTests() throws Exception { + normalizer = new SimpleRegionNormalizer(); + } + + @Test + public void testNoNormalizationForMetaTable() throws HBaseIOException { + TableName testTable = TableName.META_TABLE_NAME; + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + setupMocksForNormalizer(testTable, regionSizes); + NormalizationPlan plan = normalizer.computePlanForTable(testTable, Lists.newArrayList(hris)); + assert(plan instanceof EmptyNormalizationPlan); + } + + @Test + public void testNoNormalizationIfTooFewRegions() throws HBaseIOException { + TableName testTable = TableName.valueOf("testSplitOfSmallRegion"); + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb")); + hris.add(hri1); + regionSizes.put(hri1.getRegionName(), 10); + + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc")); + hris.add(hri2); + regionSizes.put(hri2.getRegionName(), 15); + + setupMocksForNormalizer(testTable, regionSizes); + NormalizationPlan plan = normalizer.computePlanForTable(testTable, Lists.newArrayList(hris)); + assert(plan instanceof EmptyNormalizationPlan); + } + + @Test + public void testNoNormalizationOnNormalizedCluster() throws HBaseIOException { + TableName testTable = TableName.valueOf("testSplitOfSmallRegion"); + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb")); + hris.add(hri1); + regionSizes.put(hri1.getRegionName(), 10); + + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc")); + hris.add(hri2); + regionSizes.put(hri2.getRegionName(), 15); + + HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd")); + hris.add(hri3); + regionSizes.put(hri3.getRegionName(), 8); + + HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); + hris.add(hri4); + regionSizes.put(hri4.getRegionName(), 10); + + + setupMocksForNormalizer(testTable, regionSizes); + NormalizationPlan plan = normalizer.computePlanForTable(testTable, Lists.newArrayList(hris)); + assert(plan instanceof EmptyNormalizationPlan); + } + + @Test + public void testMergeOfSmallRegions() throws HBaseIOException { + TableName testTable = TableName.valueOf("testMergeOfSmallRegions"); + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb")); + hris.add(hri1); + regionSizes.put(hri1.getRegionName(), 15); + + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc")); + hris.add(hri2); + regionSizes.put(hri2.getRegionName(), 5); + + HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd")); + hris.add(hri3); + regionSizes.put(hri3.getRegionName(), 5); + + HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); + hris.add(hri4); + regionSizes.put(hri4.getRegionName(), 15); + + HRegionInfo hri5 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); + hris.add(hri4); + regionSizes.put(hri5.getRegionName(), 16); + + setupMocksForNormalizer(testTable, regionSizes); + NormalizationPlan plan = normalizer.computePlanForTable(testTable, Lists.newArrayList(hris)); + + assert(plan instanceof MergeNormalizationPlan); + assertEquals(hri2, ((MergeNormalizationPlan) plan).getFirstRegion()); + assertEquals(hri3, ((MergeNormalizationPlan) plan).getSecondRegion()); + } + + @Test + public void testSplitOfLargeRegion() throws HBaseIOException { + TableName testTable = TableName.valueOf("testSplitOfLargeRegion"); + List hris = new ArrayList<>(); + Map regionSizes = new HashMap<>(); + + HRegionInfo hri1 = new HRegionInfo(testTable, Bytes.toBytes("aaa"), Bytes.toBytes("bbb")); + hris.add(hri1); + regionSizes.put(hri1.getRegionName(), 8); + + HRegionInfo hri2 = new HRegionInfo(testTable, Bytes.toBytes("bbb"), Bytes.toBytes("ccc")); + hris.add(hri2); + regionSizes.put(hri2.getRegionName(), 6); + + HRegionInfo hri3 = new HRegionInfo(testTable, Bytes.toBytes("ccc"), Bytes.toBytes("ddd")); + hris.add(hri3); + regionSizes.put(hri3.getRegionName(), 10); + + HRegionInfo hri4 = new HRegionInfo(testTable, Bytes.toBytes("ddd"), Bytes.toBytes("eee")); + hris.add(hri4); + regionSizes.put(hri4.getRegionName(), 30); + + setupMocksForNormalizer(testTable, regionSizes); + NormalizationPlan plan = normalizer.computePlanForTable(testTable, Lists.newArrayList(hris)); + + assert(plan instanceof SplitNormalizationPlan); + assertEquals(hri4, ((SplitNormalizationPlan) plan).getRegionInfo()); + } + + protected void setupMocksForNormalizer(TableName table, Map regionSizes) { + masterServices = Mockito.mock(MasterServices.class, RETURNS_DEEP_STUBS); + + // for simplicity all regions are assumed to be on one server; doesn't matter to us + ServerName sn = ServerName.valueOf("localhost", -1, 1L); + when(masterServices.getAssignmentManager().getRegionStates(). + getRegionServerOfRegion(any(HRegionInfo.class))).thenReturn(sn); + + for (Map.Entry region : regionSizes.entrySet()) { + RegionLoad regionLoad = Mockito.mock(RegionLoad.class); + when(regionLoad.getName()).thenReturn(region.getKey()); + when(regionLoad.getStorefileSizeMB()).thenReturn(region.getValue()); + + when(masterServices.getServerManager().getLoad(sn). + getRegionsLoad().get(region.getKey())).thenReturn(regionLoad); + } + + normalizer.setMasterServices(masterServices); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/regionnormalizer/TestSimpleRegionNormalizerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/regionnormalizer/TestSimpleRegionNormalizerOnCluster.java new file mode 100644 index 0000000..64f1c09 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/regionnormalizer/TestSimpleRegionNormalizerOnCluster.java @@ -0,0 +1,147 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.regionnormalizer; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.Region; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + +/** + * Testing {@link SimpleRegionNormalizer} on minicluster. + */ +@Category({MasterTests.class, MediumTests.class}) +public class TestSimpleRegionNormalizerOnCluster { + private static final Log LOG = LogFactory.getLog(TestSimpleRegionNormalizerOnCluster.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final byte[] FAMILYNAME = Bytes.toBytes("fam"); + private static Admin admin; + + @BeforeClass + public static void beforeAllTests() throws Exception { + // we will retry operations when PleaseHoldException is thrown + TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3); + TEST_UTIL.getConfiguration().setBoolean(HConstants.HBASE_NORMALIZER_ENABLED, true); + + // Start a cluster of two regionservers. + TEST_UTIL.startMiniCluster(1); + admin = TEST_UTIL.getHBaseAdmin(); + } + + @AfterClass + public static void afterAllTests() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + @SuppressWarnings("deprecation") + public void testRegionNormalizationSplitOnCluster() throws Exception { + final TableName TABLENAME = + TableName.valueOf("testRegionNormalizationSplitOnCluster"); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + HMaster m = cluster.getMaster(); + + try (HTable ht = TEST_UTIL.createMultiRegionTable(TABLENAME, FAMILYNAME, 5)) { + boolean generatedLargeRegion = false; + for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(TABLENAME)) { + generateTestData(region, generatedLargeRegion? 300 : 700); + generatedLargeRegion = true; + region.flush(true); + } + } + + admin.flush(TABLENAME); + + assertEquals(5, MetaTableAccessor.getRegionCount(TEST_UTIL.getConnection(), TABLENAME)); + + // Now trigger a split and stop when the split is in progress + m.normalizeRegions(); + + while (MetaTableAccessor.getRegionCount(TEST_UTIL.getConnection(), TABLENAME) < 6) { + Thread.sleep(100); + } + + assertEquals(6, MetaTableAccessor.getRegionCount(TEST_UTIL.getConnection(), TABLENAME)); + } + + @Test + @SuppressWarnings("deprecation") + public void testRegionNormalizationMergeOnCluster() throws Exception { + final TableName TABLENAME = + TableName.valueOf("testRegionNormalizationMergeOnCluster"); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + HMaster m = cluster.getMaster(); + + try (HTable ht = TEST_UTIL.createMultiRegionTable(TABLENAME, FAMILYNAME, 5)) { + for (int i = 0; i < 5; i++) { + HRegion region = (HRegion) TEST_UTIL.getHBaseCluster().getRegions(TABLENAME).toArray()[i]; + generateTestData(region, i % 4 == 0? 200 : 800); + region.flush(true); + } + } + + admin.flush(TABLENAME); + + assertEquals(5, MetaTableAccessor.getRegionCount(TEST_UTIL.getConnection(), TABLENAME)); + + // Now trigger a split and stop when the split is in progress + m.normalizeRegions(); + + while (MetaTableAccessor.getRegionCount(TEST_UTIL.getConnection(), TABLENAME) > 4) { + Thread.sleep(100); + } + + assertEquals(4, MetaTableAccessor.getRegionCount(TEST_UTIL.getConnection(), TABLENAME)); + } + + private void generateTestData(Region region, int numRows) throws IOException { + LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(10 * 1024, 10 * 1024); + for (int i = 0; i < numRows; ++i) { + byte[] key = Bytes.add(region.getRegionInfo().getStartKey(), Bytes.toBytes(i)); + for (int j = 0; j < 1; ++j) { + Put put = new Put(key); + byte[] col = Bytes.toBytes(String.valueOf(j)); + byte[] value = dataGenerator.generateRandomSizeValue(key, col); + put.add(FAMILYNAME, col, value); + region.put(put); + } + } + } +} -- 1.9.5 (Apple Git-50.3)