From 5a9d84646e3ffa9367a22b3a1b9e6a41db17a0f5 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Tue, 21 Jan 2014 14:57:50 -0800 Subject: [PATCH] HBASE-10392 Correct references to hbase.regionserver.global.memstore.upperLimit --- .../main/java/org/apache/hadoop/hbase/HBaseConfiguration.java | 10 +++++----- src/main/docbkx/ops_mgt.xml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java index a436901..e4b358f 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HBaseConfiguration.java @@ -74,8 +74,8 @@ public class HBaseConfiguration extends Configuration { } private static void checkForClusterFreeMemoryLimit(Configuration conf) { - float globalMemstoreLimit = conf.getFloat("hbase.regionserver.global.memstore.upperLimit", 0.4f); - int gml = (int)(globalMemstoreLimit * CONVERT_TO_PERCENTAGE); + float globalMemstoreSize = conf.getFloat("hbase.regionserver.global.memstore.size", 0.4f); + int gml = (int)(globalMemstoreSize * CONVERT_TO_PERCENTAGE); float blockCacheUpperLimit = conf.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT); @@ -87,10 +87,10 @@ public class HBaseConfiguration extends Configuration { "Current heap configuration for MemStore and BlockCache exceeds " + "the threshold required for successful cluster operation. " + "The combined value cannot exceed 0.8. Please check " + - "the settings for hbase.regionserver.global.memstore.upperLimit and " + + "the settings for hbase.regionserver.global.memstore.size and " + "hfile.block.cache.size in your configuration. " + - "hbase.regionserver.global.memstore.upperLimit is " + - globalMemstoreLimit + + "hbase.regionserver.global.memstore.size is " + + globalMemstoreSize + " hfile.block.cache.size is " + blockCacheUpperLimit); } } diff --git a/src/main/docbkx/ops_mgt.xml b/src/main/docbkx/ops_mgt.xml index c12f8d4..7395fc8 100644 --- a/src/main/docbkx/ops_mgt.xml +++ b/src/main/docbkx/ops_mgt.xml @@ -1051,7 +1051,7 @@ false When configuring regions for multiple tables, note that most region settings can be set on a per-table basis via HTableDescriptor, as well as shell commands. These settings will override the ones in hbase-site.xml. That is useful if your tables have different workloads/use cases. Also note that in the discussion of region sizes here, HDFS replication factor is not (and should not be) taken into account, whereas other factors should be. So, if your data is compressed and replicated 3 ways by HDFS, "9 Gb region" means 9 Gb of compressed data. HDFS replication factor only affects your disk usage and is invisible to most HBase code.
Number of regions per RS - upper bound -In production scenarios, where you have a lot of data, you are normally concerned with the maximum number of regions you can have per server. has technical discussion on the subject; in short, maximum number of regions is mostly determined by memstore memory usage. Each region has its own memstores; these grow up to a configurable size; usually in 128-256Mb range, see . There's one memstore per column family (so there's only one per region if there's one CF in the table). RS dedicates some fraction of total memory (see ) to region memstores. If this memory is exceeded (too much memstore usage), undesirable consequences such as unresponsive server, or later compaction storms, can result. Thus, a good starting point for the number of regions per RS (assuming one table) is (RS memory)*(total memstore fraction)/((memstore size)*(# column families)) +In production scenarios, where you have a lot of data, you are normally concerned with the maximum number of regions you can have per server. has technical discussion on the subject; in short, maximum number of regions is mostly determined by memstore memory usage. Each region has its own memstores; these grow up to a configurable size; usually in 128-256Mb range, see . There's one memstore per column family (so there's only one per region if there's one CF in the table). RS dedicates some fraction of total memory (see ) to region memstores. If this memory is exceeded (too much memstore usage), undesirable consequences such as unresponsive server, or later compaction storms, can result. Thus, a good starting point for the number of regions per RS (assuming one table) is (RS memory)*(total memstore fraction)/((memstore size)*(# column families)) E.g. if RS has 16Gb RAM, with default settings, it is 16384*0.4/128 ~ 51 regions per RS is a starting point. The formula can be extended to multiple tables; if they all have the same configuration, just use total number of families. This number can be adjusted; the formula above assumes all your regions are filled at approximately the same rate. If only a fraction of your regions are going to be actively written to, you can divide the result by that fraction to get a larger region count. Then, even if all regions are written to, all region memstores are not filled evenly, and eventually jitter appears even if they are (due to limited number of concurrent flushes). Thus, one can have as many as 2-3 times more regions than the starting point; however, increased numbers carry increased risk. For write-heavy workload, memstore fraction can be increased in configuration at the expense of block cache; this will also allow one to have more regions. -- 1.8.5.1