diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml
index af6822b..6dcd80d 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -994,22 +994,6 @@ possible configurations would overwhelm and obscure the important.
the keys are hidden.
- hbase.coprocessor.enabled
- true
- Enables or disables coprocessor loading. If 'false'
- (disabled), any other coprocessor related configuration will be ignored.
-
-
-
- hbase.coprocessor.user.enabled
- true
- Enables or disables user (aka. table) coprocessor loading.
- If 'false' (disabled), any table coprocessor attributes in table
- descriptors will be ignored. If "hbase.coprocessor.enabled" is 'false'
- this setting has no effect.
-
-
-
hbase.coprocessor.region.classes
A comma-separated list of Coprocessors that are loaded by
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java
index 237f617..eeb941a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/CoprocessorHost.java
@@ -73,11 +73,6 @@ public abstract class CoprocessorHost {
"hbase.coprocessor.wal.classes";
public static final String ABORT_ON_ERROR_KEY = "hbase.coprocessor.abortonerror";
public static final boolean DEFAULT_ABORT_ON_ERROR = true;
- public static final String COPROCESSORS_ENABLED_CONF_KEY = "hbase.coprocessor.enabled";
- public static final boolean DEFAULT_COPROCESSORS_ENABLED = true;
- public static final String USER_COPROCESSORS_ENABLED_CONF_KEY =
- "hbase.coprocessor.user.enabled";
- public static final boolean DEFAULT_USER_COPROCESSORS_ENABLED = true;
private static final Log LOG = LogFactory.getLog(CoprocessorHost.class);
protected Abortable abortable;
@@ -128,12 +123,6 @@ public abstract class CoprocessorHost {
* Called by constructor.
*/
protected void loadSystemCoprocessors(Configuration conf, String confKey) {
- boolean coprocessorsEnabled = conf.getBoolean(COPROCESSORS_ENABLED_CONF_KEY,
- DEFAULT_COPROCESSORS_ENABLED);
- if (!coprocessorsEnabled) {
- return;
- }
-
Class> implClass = null;
// load default coprocessors from configure file
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
index 9f003ec..2997172 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
@@ -22,8 +22,6 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.List;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -50,8 +48,6 @@ import org.apache.hadoop.hbase.protobuf.generated.QuotaProtos.Quotas;
public class MasterCoprocessorHost
extends CoprocessorHost {
- private static final Log LOG = LogFactory.getLog(MasterCoprocessorHost.class);
-
/**
* Coprocessor environment extension providing access to master related
* services.
@@ -74,16 +70,10 @@ public class MasterCoprocessorHost
private MasterServices masterServices;
- public MasterCoprocessorHost(final MasterServices services, final Configuration conf) {
+ MasterCoprocessorHost(final MasterServices services, final Configuration conf) {
super(services);
this.conf = conf;
this.masterServices = services;
- // Log the state of coprocessor loading here; should appear only once or
- // twice in the daemon log, depending on HBase version, because there is
- // only one MasterCoprocessorHost instance in the master process
- boolean coprocessorsEnabled = conf.getBoolean(COPROCESSORS_ENABLED_CONF_KEY,
- DEFAULT_COPROCESSORS_ENABLED);
- LOG.info("System coprocessor loading is " + (coprocessorsEnabled ? "enabled" : "disabled"));
loadSystemCoprocessors(conf, MASTER_COPROCESSOR_CONF_KEY);
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java
index d10141c..a32a478 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionCoprocessorHost.java
@@ -328,14 +328,6 @@ public class RegionCoprocessorHost
}
void loadTableCoprocessors(final Configuration conf) {
- boolean coprocessorsEnabled = conf.getBoolean(COPROCESSORS_ENABLED_CONF_KEY,
- DEFAULT_COPROCESSORS_ENABLED);
- boolean tableCoprocessorsEnabled = conf.getBoolean(USER_COPROCESSORS_ENABLED_CONF_KEY,
- DEFAULT_USER_COPROCESSORS_ENABLED);
- if (!(coprocessorsEnabled && tableCoprocessorsEnabled)) {
- return;
- }
-
// scan the table attributes for coprocessor load specifications
// initialize the coprocessors
List configured = new ArrayList();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerCoprocessorHost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerCoprocessorHost.java
index ab8e948..43a3f32 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerCoprocessorHost.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerCoprocessorHost.java
@@ -22,8 +22,6 @@ import java.io.IOException;
import java.util.Comparator;
import java.util.List;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@@ -46,8 +44,6 @@ import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
public class RegionServerCoprocessorHost extends
CoprocessorHost {
- private static final Log LOG = LogFactory.getLog(RegionServerCoprocessorHost.class);
-
private RegionServerServices rsServices;
public RegionServerCoprocessorHost(RegionServerServices rsServices,
@@ -55,16 +51,7 @@ public class RegionServerCoprocessorHost extends
super(rsServices);
this.rsServices = rsServices;
this.conf = conf;
- // Log the state of coprocessor loading here; should appear only once or
- // twice in the daemon log, depending on HBase version, because there is
- // only one RegionServerCoprocessorHost instance in the RS process
- boolean coprocessorsEnabled = conf.getBoolean(COPROCESSORS_ENABLED_CONF_KEY,
- DEFAULT_COPROCESSORS_ENABLED);
- boolean tableCoprocessorsEnabled = conf.getBoolean(USER_COPROCESSORS_ENABLED_CONF_KEY,
- DEFAULT_USER_COPROCESSORS_ENABLED);
- LOG.info("System coprocessor loading is " + (coprocessorsEnabled ? "enabled" : "disabled"));
- LOG.info("Table coprocessor loading is " +
- ((coprocessorsEnabled && tableCoprocessorsEnabled) ? "enabled" : "disabled"));
+ // load system default cp's from configuration.
loadSystemCoprocessors(conf, REGIONSERVER_COPROCESSOR_CONF_KEY);
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestCoprocessorConfiguration.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestCoprocessorConfiguration.java
deleted file mode 100644
index fb2f20c..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestCoprocessorConfiguration.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hbase.coprocessor;
-
-import java.io.IOException;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import static org.mockito.Mockito.*;
-import static org.junit.Assert.*;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.Coprocessor;
-import org.apache.hadoop.hbase.CoprocessorEnvironment;
-import org.apache.hadoop.hbase.HBaseConfiguration;
-import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HTableDescriptor;
-import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
-import org.apache.hadoop.hbase.master.MasterServices;
-import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost;
-import org.apache.hadoop.hbase.regionserver.RegionServerCoprocessorHost;
-import org.apache.hadoop.hbase.regionserver.RegionServerServices;
-import org.apache.hadoop.hbase.testclassification.CoprocessorTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-/**
- * Tests for global coprocessor loading configuration
- */
-@Category({CoprocessorTests.class, SmallTests.class})
-public class TestCoprocessorConfiguration {
-
- private static final Configuration CONF = HBaseConfiguration.create();
- static {
- CONF.setStrings(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
- SystemCoprocessor.class.getName());
- CONF.setStrings(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY,
- SystemCoprocessor.class.getName());
- CONF.setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
- SystemCoprocessor.class.getName());
- }
- private static final TableName TABLENAME = TableName.valueOf("TestCoprocessorConfiguration");
- private static final HRegionInfo REGIONINFO = new HRegionInfo(TABLENAME);
- private static final HTableDescriptor TABLEDESC = new HTableDescriptor(TABLENAME);
- static {
- try {
- TABLEDESC.addCoprocessor(TableCoprocessor.class.getName());
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- // use atomic types in case coprocessor loading is ever multithreaded, also
- // so we can mutate them even though they are declared final here
- private static final AtomicBoolean systemCoprocessorLoaded = new AtomicBoolean();
- private static final AtomicBoolean tableCoprocessorLoaded = new AtomicBoolean();
-
- public static class SystemCoprocessor implements Coprocessor {
- @Override
- public void start(CoprocessorEnvironment env) throws IOException {
- systemCoprocessorLoaded.set(true);
- }
-
- @Override
- public void stop(CoprocessorEnvironment env) throws IOException { }
- }
-
- public static class TableCoprocessor implements Coprocessor {
- @Override
- public void start(CoprocessorEnvironment env) throws IOException {
- tableCoprocessorLoaded.set(true);
- }
-
- @Override
- public void stop(CoprocessorEnvironment env) throws IOException { }
- }
-
- @Test
- public void testRegionCoprocessorHostDefaults() throws Exception {
- Configuration conf = new Configuration(CONF);
- HRegion region = mock(HRegion.class);
- when(region.getRegionInfo()).thenReturn(REGIONINFO);
- when(region.getTableDesc()).thenReturn(TABLEDESC);
- RegionServerServices rsServices = mock(RegionServerServices.class);
- systemCoprocessorLoaded.set(false);
- tableCoprocessorLoaded.set(false);
- new RegionCoprocessorHost(region, rsServices, conf);
- assertEquals("System coprocessors loading default was not honored",
- systemCoprocessorLoaded.get(),
- CoprocessorHost.DEFAULT_COPROCESSORS_ENABLED);
- assertEquals("Table coprocessors loading default was not honored",
- tableCoprocessorLoaded.get(),
- CoprocessorHost.DEFAULT_COPROCESSORS_ENABLED &&
- CoprocessorHost.DEFAULT_USER_COPROCESSORS_ENABLED);
- }
-
- @Test
- public void testRegionServerCoprocessorHostDefaults() throws Exception {
- Configuration conf = new Configuration(CONF);
- RegionServerServices rsServices = mock(RegionServerServices.class);
- systemCoprocessorLoaded.set(false);
- new RegionServerCoprocessorHost(rsServices, conf);
- assertEquals("System coprocessors loading default was not honored",
- systemCoprocessorLoaded.get(),
- CoprocessorHost.DEFAULT_COPROCESSORS_ENABLED);
- }
-
- @Test
- public void testMasterCoprocessorHostDefaults() throws Exception {
- Configuration conf = new Configuration(CONF);
- MasterServices masterServices = mock(MasterServices.class);
- systemCoprocessorLoaded.set(false);
- new MasterCoprocessorHost(masterServices, conf);
- assertEquals("System coprocessors loading default was not honored",
- systemCoprocessorLoaded.get(),
- CoprocessorHost.DEFAULT_COPROCESSORS_ENABLED);
- }
-
- @Test
- public void testRegionCoprocessorHostAllDisabled() throws Exception {
- Configuration conf = new Configuration(CONF);
- conf.setBoolean(CoprocessorHost.COPROCESSORS_ENABLED_CONF_KEY, false);
- HRegion region = mock(HRegion.class);
- when(region.getRegionInfo()).thenReturn(REGIONINFO);
- when(region.getTableDesc()).thenReturn(TABLEDESC);
- RegionServerServices rsServices = mock(RegionServerServices.class);
- systemCoprocessorLoaded.set(false);
- tableCoprocessorLoaded.set(false);
- new RegionCoprocessorHost(region, rsServices, conf);
- assertFalse("System coprocessors should not have been loaded",
- systemCoprocessorLoaded.get());
- assertFalse("Table coprocessors should not have been loaded",
- tableCoprocessorLoaded.get());
- }
-
- @Test
- public void testRegionCoprocessorHostTableLoadingDisabled() throws Exception {
- Configuration conf = new Configuration(CONF);
- conf.setBoolean(CoprocessorHost.COPROCESSORS_ENABLED_CONF_KEY, true); // if defaults change
- conf.setBoolean(CoprocessorHost.USER_COPROCESSORS_ENABLED_CONF_KEY, false);
- HRegion region = mock(HRegion.class);
- when(region.getRegionInfo()).thenReturn(REGIONINFO);
- when(region.getTableDesc()).thenReturn(TABLEDESC);
- RegionServerServices rsServices = mock(RegionServerServices.class);
- systemCoprocessorLoaded.set(false);
- tableCoprocessorLoaded.set(false);
- new RegionCoprocessorHost(region, rsServices, conf);
- assertTrue("System coprocessors should have been loaded",
- systemCoprocessorLoaded.get());
- assertFalse("Table coprocessors should not have been loaded",
- tableCoprocessorLoaded.get());
- }
-}
diff --git a/src/main/asciidoc/_chapters/architecture.adoc b/src/main/asciidoc/_chapters/architecture.adoc
index bae4a23..6de7208 100644
--- a/src/main/asciidoc/_chapters/architecture.adoc
+++ b/src/main/asciidoc/_chapters/architecture.adoc
@@ -858,6 +858,31 @@ For a RegionServer hosting data that can comfortably fit into cache, or if your
The compressed BlockCache is disabled by default. To enable it, set `hbase.block.data.cachecompressed` to `true` in _hbase-site.xml_ on all RegionServers.
+[[regionserver_splitting_implementation]]
+=== RegionServer Splitting Implementation
+
+As write requests are handled by the region server, they accumulate in an in-memory storage system called the _memstore_. Once the memstore fills, its content are written to disk as additional store files. This event is called a _memstore flush_. As store files accumulate, the RegionServer will <> them into fewer, larger files. After each flush or compaction finishes, the amount of data stored in the region has changed. The RegionServer consults the region split policy to determine if the region has grown too large or should be split for another policy-specific reason. A region split request is enqueued if the policy recommends it.
+
+Logically, the process of splitting a region is simple. We find a suitable point in the keyspace of the region where we should divide the region in half, then split the region's data into two new regions at that point. The details of the process however are not simple. When a split happens, the newly created _daughter regions_ do not rewrite all the data into new files immediately. Instead, they create small files similar to symbolic link files, named link:http://www.google.com/url?q=http%3A%2F%2Fhbase.apache.org%2Fapidocs%2Forg%2Fapache%2Fhadoop%2Fhbase%2Fio%2FReference.html&sa=D&sntz=1&usg=AFQjCNEkCbADZ3CgKHTtGYI8bJVwp663CA[Reference files], which point to either the top or bottom part of the parent store file according to the split point. The reference file is used just like a regular data file, but only half of the records are considered. The region can only be split if there are no more references to the immutable data files of the parent region. Those reference files are cleaned gradually by compactions, so that the region will stop referring to its parents files, and can be split further.
+
+Although splitting the region is a local decision made by the RegionServer, the split process itself must coordinate with many actors. The RegionServer notifies the Master before and after the split, updates the `.META.` table so that clients can discover the new daughter regions, and rearranges the directory structure and data files in HDFS. Splitting is a multi-task process. To enable rollback in case of an error, the RegionServer keeps an in-memory journal about the execution state. The steps taken by the RegionServer to execute the split are illustrated in <>. Each step is labeled with its step number. Actions from RegionServers or Master are shown in red, while actions from the clients are show in green.
+
+[[regionserver_split_process_image]]
+.RegionServer Split Process
+image::region_split_process.png[Region Split Process]
+
+. The RegionServer decides locally to split the region, and prepares the split. *THE SPLIT TRANSACTION IS STARTED.* As a first step, the RegionServer acquires a shared read lock on the table to prevent schema modifications during the splitting process. Then it creates a znode in zookeeper under `/hbase/region-in-transition/region-name`, and sets the znode's state to `SPLITTING`.
+. The Master learns about this znode, since it has a watcher for the parent `region-in-transition` znode.
+. The RegionServer creates a sub-directory named `.splits` under the parent’s `region` directory in HDFS.
+. The RegionServer closes the parent region and marks the region as offline in its local data structures. *THE SPLITTING REGION IS NOW OFFLINE.* At this point, client requests coming to the parent region will throw `NotServingRegionException`. The client will retry with some backoff. The closing region is flushed.
+. The RegionServer creates region directories under the `.splits` directory, for daughter regions A and B, and creates necessary data structures. Then it splits the store files, in the sense that it creates two link:http://www.google.com/url?q=http%3A%2F%2Fhbase.apache.org%2Fapidocs%2Forg%2Fapache%2Fhadoop%2Fhbase%2Fio%2FReference.html&sa=D&sntz=1&usg=AFQjCNEkCbADZ3CgKHTtGYI8bJVwp663CA[Reference] files per store file in the parent region. Those reference files will point to the parent regions'files.
+. The RegionServer creates the actual region directory in HDFS, and moves the reference files for each daughter.
+. The RegionServer sends a `Put` request to the `.META.` table, to set the parent as offline in the `.META.` table and add information about daughter regions. At this point, there won’t be individual entries in `.META.` for the daughters. Clients will see that the parent region is split if they scan `.META.`, but won’t know about the daughters until they appear in `.META.`. Also, if this `Put` to `.META`. succeeds, the parent will be effectively split. If the RegionServer fails before this RPC succeeds, Master and the next Region Server opening the region will clean dirty state about the region split. After the `.META.` update, though, the region split will be rolled-forward by Master.
+. The RegionServer opens daughters A and B in parallel.
+. The RegionServer adds the daughters A and B to `.META.`, together with information that it hosts the regions. *THE SPLIT REGIONS (DAUGHTERS WITH REFERENCES TO PARENT) ARE NOW ONLINE.* After this point, clients can discover the new regions and issue requests to them. Clients cache the `.META.` entries locally, but when they make requests to the RegionServer or `.META.`, their caches will be invalidated, and they will learn about the new regions from `.META.`.
+. The RegionServer updates znode `/hbase/region-in-transition/region-name` in ZooKeeper to state `SPLIT`, so that the master can learn about it. The balancer can freely re-assign the daughter regions to other region servers if necessary. *THE SPLIT TRANSACTION IS NOW FINISHED.*
+. After the split, `.META.` and HDFS will still contain references to the parent region. Those references will be removed when compactions in daughter regions rewrite the data files. Garbage collection tasks in the master periodically check whether the daughter regions still refer to the parent region's files. If not, the parent region will be removed.
+
[[wal]]
=== Write Ahead Log (WAL)
diff --git a/src/main/site/resources/images/region_split_process.png b/src/main/site/resources/images/region_split_process.png
new file mode 100644
index 0000000..2717617
Binary files /dev/null and b/src/main/site/resources/images/region_split_process.png differ