From a63c7a0e88aa4c06c1b5d6f1aa24eb7070a6cc9e Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Sun, 18 Aug 2019 22:05:43 +0530 Subject: [PATCH] HBASE-22760 : Pause/Resume/Query Snapshot Auto Cleanup Activity --- .../org/apache/hadoop/hbase/client/Admin.java | 22 ++++ .../hbase/client/AdminOverAsyncAdmin.java | 12 ++ .../hadoop/hbase/client/AsyncAdmin.java | 23 ++++ .../hadoop/hbase/client/AsyncHBaseAdmin.java | 12 ++ .../hbase/client/RawAsyncHBaseAdmin.java | 28 +++++ .../shaded/protobuf/RequestConverter.java | 28 +++++ .../hadoop/hbase/zookeeper/ZNodePaths.java | 38 ++++-- .../org/apache/hadoop/hbase/HConstants.java | 2 - .../src/main/protobuf/Master.proto | 28 +++++ .../main/protobuf/SnapshotAutoCleanup.proto | 31 +++++ .../apache/hadoop/hbase/master/HMaster.java | 38 +++++- .../hbase/master/MasterRpcServices.java | 68 +++++++++++ .../hadoop/hbase/master/MasterServices.java | 8 ++ .../master/cleaner/SnapshotCleanerChore.java | 62 +++++----- .../hadoop/hbase/client/TestAdmin2.java | 27 +++++ .../hbase/master/MockNoopMasterServices.java | 6 + .../cleaner/TestSnapshotCleanerChore.java | 1 - .../cleaner/TestSnapshotFromMaster.java | 103 ++++++++++++++++ .../hbase/snapshot/SnapshotTestingUtils.java | 23 ++++ hbase-shell/src/main/ruby/hbase/admin.rb | 16 +++ hbase-shell/src/main/ruby/shell.rb | 2 + .../commands/snapshot_auto_cleanup_enabled.rb | 39 ++++++ .../commands/snapshot_auto_cleanup_switch.rb | 43 +++++++ hbase-shell/src/test/ruby/hbase/admin_test.rb | 14 +++ .../hbase/thrift2/client/ThriftAdmin.java | 10 ++ .../zookeeper/SnapshotAutoCleanupTracker.java | 112 ++++++++++++++++++ src/main/asciidoc/_chapters/ops_mgt.adoc | 38 +++++- 27 files changed, 784 insertions(+), 50 deletions(-) create mode 100644 hbase-protocol-shaded/src/main/protobuf/SnapshotAutoCleanup.proto create mode 100644 hbase-shell/src/main/ruby/shell/commands/snapshot_auto_cleanup_enabled.rb create mode 100644 hbase-shell/src/main/ruby/shell/commands/snapshot_auto_cleanup_switch.rb create mode 100644 hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/SnapshotAutoCleanupTracker.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java index c553a5656c..4c96c995ca 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java @@ -2232,4 +2232,26 @@ public interface Admin extends Abortable, Closeable { default List hasUserPermissions(List permissions) throws IOException { return hasUserPermissions(null, permissions); } + + /** + * Turn on or off the auto snapshot cleanup based on TTL. + * + * @param on Set to true to enable, false to disable. + * @param synchronous If true, it waits until current snapshot cleanup is completed, + * if outstanding. + * @return Previous auto snapshot cleanup value + * @throws IOException if a remote or network exception occurs + */ + boolean snapshotAutoCleanupSwitch(final boolean on, final boolean synchronous) + throws IOException; + + /** + * Query the current state of the auto snapshot cleanup based on TTL. + * + * @return true if the auto snapshot cleanup is enabled, + * false otherwise. + * @throws IOException if a remote or network exception occurs + */ + boolean isSnapshotAutoCleanupEnabled() throws IOException; + } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java index 599e5d69f6..a981d3a700 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java @@ -942,4 +942,16 @@ class AdminOverAsyncAdmin implements Admin { throws IOException { return get(admin.hasUserPermissions(userName, permissions)); } + + @Override + public boolean snapshotAutoCleanupSwitch(final boolean on, final boolean synchronous) + throws IOException { + return get(admin.snapshotAutoCleanupSwitch(on, synchronous)); + } + + @Override + public boolean isSnapshotAutoCleanupEnabled() throws IOException { + return get(admin.isSnapshotAutoCleanupEnabled()); + } + } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java index 75dc6d2a21..8b0e743279 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java @@ -1484,4 +1484,27 @@ public interface AsyncAdmin { default CompletableFuture> hasUserPermissions(List permissions) { return hasUserPermissions(null, permissions); } + + /** + * Turn on or off the auto snapshot cleanup based on TTL. + *

+ * Notice that, the method itself is always non-blocking, which means it will always return + * immediately. The {@code sync} parameter only effects when will we complete the returned + * {@link CompletableFuture}. + * + * @param on Set to true to enable, false to disable. + * @param sync If true, it waits until current snapshot cleanup is completed, + * if outstanding. + * @return Previous auto snapshot cleanup value wrapped by a {@link CompletableFuture}. + */ + CompletableFuture snapshotAutoCleanupSwitch(boolean on, boolean sync); + + /** + * Query the current state of the auto snapshot cleanup based on TTL. + * + * @return true if the auto snapshot cleanup is enabled, false otherwise. + * The return value will be wrapped by a {@link CompletableFuture}. + */ + CompletableFuture isSnapshotAutoCleanupEnabled(); + } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java index 7787f18833..785cc458ee 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java @@ -826,4 +826,16 @@ class AsyncHBaseAdmin implements AsyncAdmin { List permissions) { return wrap(rawAdmin.hasUserPermissions(userName, permissions)); } + + @Override + public CompletableFuture snapshotAutoCleanupSwitch(final boolean on, + final boolean sync) { + return wrap(rawAdmin.snapshotAutoCleanupSwitch(on, sync)); + } + + @Override + public CompletableFuture isSnapshotAutoCleanupEnabled() { + return wrap(rawAdmin.isSnapshotAutoCleanupEnabled()); + } + } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java index 47a79022cb..01a592fe05 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java @@ -206,6 +206,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsProcedur import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsProcedureDoneResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsRpcThrottleEnabledRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsRpcThrottleEnabledResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .IsSnapshotAutoCleanupEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSplitOrMergeEnabledRequest; @@ -256,6 +258,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetNormali import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetNormalizerRunningResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetQuotaRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetQuotaResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .SetSnapshotAutoCleanupResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetSplitOrMergeEnabledRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetSplitOrMergeEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ShutdownRequest; @@ -3856,4 +3860,28 @@ class RawAsyncHBaseAdmin implements AsyncAdmin { resp -> resp.getHasUserPermissionList())) .call(); } + + @Override + public CompletableFuture snapshotAutoCleanupSwitch(final boolean on, + final boolean sync) { + return this.newMasterCaller() + .action((controller, stub) -> this + .call(controller, stub, + RequestConverter.buildSetSnapshotAutoCleanupRequest(on, sync), + MasterService.Interface::switchSnapshotAutoCleanup, + SetSnapshotAutoCleanupResponse::getPrevSnapshotAutoCleanup)) + .call(); + } + + @Override + public CompletableFuture isSnapshotAutoCleanupEnabled() { + return this.newMasterCaller() + .action((controller, stub) -> this + .call(controller, stub, + RequestConverter.buildIsSnapshotAutoCleanupEnabledRequest(), + MasterService.Interface::isSnapshotAutoCleanupEnabled, + IsSnapshotAutoCleanupEnabledResponse::getEnabled)) + .call(); + } + } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java index 1bad6bd3e3..af5c74fd81 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java @@ -121,6 +121,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsCatalogJ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsCleanerChoreEnabledRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsMasterRunningRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsNormalizerEnabledRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .IsSnapshotAutoCleanupEnabledRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSplitOrMergeEnabledRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MergeTableRegionsRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ModifyColumnRequest; @@ -135,6 +137,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCleaner import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetBalancerRunningRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetCleanerChoreRunningRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetNormalizerRunningRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .SetSnapshotAutoCleanupRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetSplitOrMergeEnabledRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetTableStateInMetaRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SplitTableRegionRequest; @@ -1900,4 +1904,28 @@ public final class RequestConverter { map(r -> buildRegionSpecifier(RegionSpecifierType.ENCODED_REGION_NAME, Bytes.toBytes(r))). collect(Collectors.toList()); } + + /** + * Creates SetSnapshotAutoCleanupRequest for turning on/off auto snapshot cleanup + * + * @param on Set to true to enable, false to disable. + * @param synchronous If true, it waits until current snapshot cleanup is completed, + * if outstanding. + * @return a SetSnapshotAutoCleanupRequest + */ + public static SetSnapshotAutoCleanupRequest buildSetSnapshotAutoCleanupRequest( + final boolean on, final boolean synchronous) { + return SetSnapshotAutoCleanupRequest.newBuilder().setOn(on).setSynchronous(synchronous).build(); + } + + /** + * Creates IsSnapshotAutoCleanupEnabledRequest to determine if auto snapshot cleanup + * based on TTL expiration is turned on + * + * @return IsSnapshotAutoCleanupEnabledRequest + */ + public static IsSnapshotAutoCleanupEnabledRequest buildIsSnapshotAutoCleanupEnabledRequest() { + return IsSnapshotAutoCleanupEnabledRequest.newBuilder().build(); + } + } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java index b9a34120fd..65552553f6 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java @@ -41,6 +41,7 @@ public class ZNodePaths { public static final char ZNODE_PATH_SEPARATOR = '/'; public final static String META_ZNODE_PREFIX = "meta-region-server"; + private static final String DEFAULT_SNAPSHOT_AUTOCLEANUP_ZNODE = "snapshot-autocleanup"; // base znode for this cluster public final String baseZNode; @@ -89,6 +90,8 @@ public class ZNodePaths { public final String queuesZNode; // znode containing queues of hfile references to be replicated public final String hfileRefsZNode; + // znode containing the state of the snapshot auto-cleanup + final String snapshotAutoCleanupZNode; public ZNodePaths(Configuration conf) { baseZNode = conf.get(ZOOKEEPER_ZNODE_PARENT, DEFAULT_ZOOKEEPER_ZNODE_PARENT); @@ -123,20 +126,35 @@ public class ZNodePaths { queuesZNode = joinZNode(replicationZNode, conf.get("zookeeper.znode.replication.rs", "rs")); hfileRefsZNode = joinZNode(replicationZNode, conf.get("zookeeper.znode.replication.hfile.refs", "hfile-refs")); + snapshotAutoCleanupZNode = joinZNode(baseZNode, + conf.get("zookeeper.znode.snapshot.autocleanup", DEFAULT_SNAPSHOT_AUTOCLEANUP_ZNODE)); } @Override public String toString() { - return "ZNodePaths [baseZNode=" + baseZNode + ", metaReplicaZNodes=" + metaReplicaZNodes - + ", rsZNode=" + rsZNode + ", drainingZNode=" + drainingZNode + ", masterAddressZNode=" - + masterAddressZNode + ", backupMasterAddressesZNode=" + backupMasterAddressesZNode - + ", clusterStateZNode=" + clusterStateZNode + ", tableZNode=" + tableZNode - + ", clusterIdZNode=" + clusterIdZNode + ", splitLogZNode=" + splitLogZNode - + ", balancerZNode=" + balancerZNode + ", regionNormalizerZNode=" + regionNormalizerZNode - + ", switchZNode=" + switchZNode + ", tableLockZNode=" + tableLockZNode - + ", namespaceZNode=" + namespaceZNode + ", masterMaintZNode=" + masterMaintZNode - + ", replicationZNode=" + replicationZNode + ", peersZNode=" + peersZNode - + ", queuesZNode=" + queuesZNode + ", hfileRefsZNode=" + hfileRefsZNode + "]"; + return new StringBuilder() + .append("ZNodePaths [baseZNode=").append(baseZNode) + .append(", metaReplicaZNodes=").append(metaReplicaZNodes) + .append(", rsZNode=").append(rsZNode) + .append(", drainingZNode=").append(drainingZNode) + .append(", masterAddressZNode=").append(masterAddressZNode) + .append(", backupMasterAddressesZNode=").append(backupMasterAddressesZNode) + .append(", clusterStateZNode=").append(clusterStateZNode) + .append(", tableZNode=").append(tableZNode) + .append(", clusterIdZNode=").append(clusterIdZNode) + .append(", splitLogZNode=").append(splitLogZNode) + .append(", balancerZNode=").append(balancerZNode) + .append(", regionNormalizerZNode=").append(regionNormalizerZNode) + .append(", switchZNode=").append(switchZNode) + .append(", tableLockZNode=").append(tableLockZNode) + .append(", namespaceZNode=").append(namespaceZNode) + .append(", masterMaintZNode=").append(masterMaintZNode) + .append(", replicationZNode=").append(replicationZNode) + .append(", peersZNode=").append(peersZNode) + .append(", queuesZNode=").append(queuesZNode) + .append(", hfileRefsZNode=").append(hfileRefsZNode) + .append(", snapshotAutoCleanupZNode=").append(snapshotAutoCleanupZNode) + .append("]").toString(); } /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 65c35a61d5..27cbd7cb54 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -1471,8 +1471,6 @@ public final class HConstants { // User defined Default TTL config key public static final String DEFAULT_SNAPSHOT_TTL_CONFIG_KEY = "hbase.master.snapshot.ttl"; - public static final String SNAPSHOT_CLEANER_DISABLE = "hbase.master.cleaner.snapshot.disable"; - /** * Configurations for master executor services. */ diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto index 3429d0343d..2fd0f10177 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto @@ -318,6 +318,22 @@ enum MasterSwitchType { MERGE = 1; } +message SetSnapshotAutoCleanupRequest { + required bool on = 1; + optional bool synchronous = 2; +} + +message SetSnapshotAutoCleanupResponse { + required bool prev_snapshot_auto_cleanup = 1; +} + +message IsSnapshotAutoCleanupEnabledRequest { +} + +message IsSnapshotAutoCleanupEnabledResponse { + required bool enabled = 1; +} + message SetSplitOrMergeEnabledRequest { required bool enabled = 1; optional bool synchronous = 2; @@ -896,6 +912,18 @@ service MasterService { */ rpc RestoreSnapshot(RestoreSnapshotRequest) returns(RestoreSnapshotResponse); + /** + * Turn on/off snapshot auto-cleanup based on TTL expiration + */ + rpc SwitchSnapshotAutoCleanup (SetSnapshotAutoCleanupRequest) + returns (SetSnapshotAutoCleanupResponse); + + /** + * Determine if snapshot auto-cleanup based on TTL expiration is turned on + */ + rpc IsSnapshotAutoCleanupEnabled (IsSnapshotAutoCleanupEnabledRequest) + returns (IsSnapshotAutoCleanupEnabledResponse); + /** * Execute a distributed procedure. */ diff --git a/hbase-protocol-shaded/src/main/protobuf/SnapshotAutoCleanup.proto b/hbase-protocol-shaded/src/main/protobuf/SnapshotAutoCleanup.proto new file mode 100644 index 0000000000..7163193924 --- /dev/null +++ b/hbase-protocol-shaded/src/main/protobuf/SnapshotAutoCleanup.proto @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto2"; + +// This file contains protocol buffers to represent the state of the snapshot auto cleanup based on TTL +package hbase.pb; + +option java_package = "org.apache.hadoop.hbase.shaded.protobuf.generated"; +option java_outer_classname = "SnapshotAutoCleanupProtos"; +option java_generate_equals_and_hash = true; +option optimize_for = SPEED; + +message SnapshotAutoCleanupState { + optional bool snapshot_auto_cleanup_on = 1; +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index b66eb05ef1..0a3f18a3af 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -207,6 +207,7 @@ import org.apache.hadoop.hbase.util.VersionInfo; import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker; import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker; +import org.apache.hadoop.hbase.zookeeper.SnapshotAutoCleanupTracker; import org.apache.hadoop.hbase.zookeeper.ZKClusterId; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; @@ -318,6 +319,8 @@ public class HMaster extends HRegionServer implements MasterServices { MetaLocationSyncer metaLocationSyncer; // Tracker for active master location, if any client ZK quorum specified MasterAddressSyncer masterAddressSyncer; + // Tracker for auto snapshot cleanup state + SnapshotAutoCleanupTracker snapshotAutoCleanupTracker; // Tracker for split and merge state private SplitOrMergeTracker splitOrMergeTracker; @@ -777,6 +780,9 @@ public class HMaster extends HRegionServer implements MasterServices { this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager); this.drainingServerTracker.start(); + this.snapshotAutoCleanupTracker = new SnapshotAutoCleanupTracker(zooKeeper, this); + this.snapshotAutoCleanupTracker.start(); + String clientQuorumServers = conf.get(HConstants.CLIENT_ZOOKEEPER_QUORUM); boolean clientZkObserverMode = conf.getBoolean(HConstants.CLIENT_ZOOKEEPER_OBSERVER_MODE, HConstants.DEFAULT_CLIENT_ZOOKEEPER_OBSERVER_MODE); @@ -1467,15 +1473,15 @@ public class HMaster extends HRegionServer implements MasterServices { replicationPeerManager); getChoreService().scheduleChore(replicationBarrierCleaner); - final boolean isSnapshotChoreDisabled = conf.getBoolean(HConstants.SNAPSHOT_CLEANER_DISABLE, - false); - if (isSnapshotChoreDisabled) { + final boolean isSnapshotChoreEnabled = this.snapshotAutoCleanupTracker + .isSnapshotAutoCleanupEnabled(); + this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager()); + if (isSnapshotChoreEnabled) { + getChoreService().scheduleChore(this.snapshotCleanerChore); + } else { if (LOG.isTraceEnabled()) { LOG.trace("Snapshot Cleaner Chore is disabled. Not starting up the chore.."); } - } else { - this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager()); - getChoreService().scheduleChore(this.snapshotCleanerChore); } serviceStarted = true; if (LOG.isTraceEnabled()) { @@ -1568,6 +1574,21 @@ public class HMaster extends HRegionServer implements MasterServices { procedureExecutor.startWorkers(); } + /** + * Turn on/off Snapshot Cleanup Chore + * + * @param on indicates whether Snapshot Cleanup Chore is to be run + */ + void switchSnapshotAutoCleanup(final boolean on) { + if (on) { + if (!getChoreService().isChoreScheduled(this.snapshotCleanerChore)) { + getChoreService().scheduleChore(this.snapshotCleanerChore); + } + } else { + getChoreService().cancelChore(this.snapshotCleanerChore); + } + } + private void stopProcedureExecutor() { if (procedureExecutor != null) { configurationManager.deregisterObserver(procedureExecutor.getEnvironment()); @@ -3471,6 +3492,11 @@ public class HMaster extends HRegionServer implements MasterServices { return favoredNodesManager; } + @Override + public SnapshotCleanerChore getSnapshotCleanerChore() { + return this.snapshotCleanerChore; + } + private long executePeerProcedure(AbstractPeerProcedure procedure) throws IOException { long procId = procedureExecutor.submitProcedure(procedure); procedure.getLatch().await(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index c8caea76d8..d9494b2676 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -219,6 +219,10 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsNormaliz import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsNormalizerEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsProcedureDoneRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsProcedureDoneResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .IsSnapshotAutoCleanupEnabledRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .IsSnapshotAutoCleanupEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSplitOrMergeEnabledRequest; @@ -271,6 +275,10 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetNormali import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetNormalizerRunningResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetQuotaRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetQuotaResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .SetSnapshotAutoCleanupRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .SetSnapshotAutoCleanupResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetSplitOrMergeEnabledRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetSplitOrMergeEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetTableStateInMetaRequest; @@ -1485,6 +1493,66 @@ public class MasterRpcServices extends RSRpcServices } } + @Override + public SetSnapshotAutoCleanupResponse switchSnapshotAutoCleanup( + RpcController controller, SetSnapshotAutoCleanupRequest request) + throws ServiceException { + try { + master.checkInitialized(); + final boolean on = request.getOn(); + final boolean prevSnapshotAutoCleanupRunning = request.getSynchronous() ? + this.switchSnapshotAutoCleanup(on, true) : this.switchSnapshotAutoCleanup(on, false); + return SetSnapshotAutoCleanupResponse.newBuilder() + .setPrevSnapshotAutoCleanup(prevSnapshotAutoCleanupRunning).build(); + } catch (IOException e) { + throw new ServiceException(e); + } + } + + @Override + public IsSnapshotAutoCleanupEnabledResponse isSnapshotAutoCleanupEnabled( + RpcController controller, IsSnapshotAutoCleanupEnabledRequest request) + throws ServiceException { + try { + master.checkInitialized(); + final boolean isSnapshotAutoCleanupEnabled = master.snapshotAutoCleanupTracker + .isSnapshotAutoCleanupEnabled(); + return IsSnapshotAutoCleanupEnabledResponse.newBuilder() + .setEnabled(isSnapshotAutoCleanupEnabled).build(); + } catch (IOException e) { + throw new ServiceException(e); + } + } + + /** + * Turn on/off snapshot auto-cleanup based on TTL + * + * @param newValue Set to true to enable, false to disable + * @param synchronous If true, it waits until current snapshot cleanup is completed, + * if outstanding + * @return previous snapshot auto-cleanup mode + */ + private boolean switchSnapshotAutoCleanup(final boolean newValue, final boolean synchronous) { + final boolean oldValue = master.snapshotAutoCleanupTracker.isSnapshotAutoCleanupEnabled(); + try { + if (synchronous) { + synchronized (master.getSnapshotCleanerChore().getMutexLockForSync()) { + master.snapshotAutoCleanupTracker.setSnapshotAutoCleanupOn(newValue); + master.switchSnapshotAutoCleanup(newValue); + } + } else { + master.snapshotAutoCleanupTracker.setSnapshotAutoCleanupOn(newValue); + master.switchSnapshotAutoCleanup(newValue); + } + LOG.info("{} Successfully set snapshot auto cleanup to {}", + master.getClientIdAuditPrefix(), newValue); + } catch (KeeperException e) { + LOG.error("Error updating snapshot auto cleanup mode to {}", newValue, e); + } + return oldValue; + } + + @Override public RunCatalogScanResponse runCatalogScan(RpcController c, RunCatalogScanRequest req) throws ServiceException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index 41cec5cfb2..5cdb01cbcb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.favored.FavoredNodesManager; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.master.cleaner.SnapshotCleanerChore; import org.apache.hadoop.hbase.master.locking.LockManager; import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; @@ -423,6 +424,13 @@ public interface MasterServices extends Server { */ public FavoredNodesManager getFavoredNodesManager(); + /** + * Retrieve instance of SnapshotCleanerChore + * + * @return return SnapshotCleanerChore instance initialized by master + */ + SnapshotCleanerChore getSnapshotCleanerChore(); + /** * Add a new replication peer for replicating data to slave cluster * @param peerId a short name that identifies the peer diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java index 7e12acd839..e0751dbbff 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java @@ -50,6 +50,8 @@ public class SnapshotCleanerChore extends ScheduledChore { private final SnapshotManager snapshotManager; + private static final Object MUTEX = new Object(); + /** * Construct Snapshot Cleaner Chore with parameterized constructor * @@ -66,39 +68,45 @@ public class SnapshotCleanerChore extends ScheduledChore { @Override protected void chore() { - if (LOG.isTraceEnabled()) { - LOG.trace("Snapshot Cleaner Chore is starting up..."); - } - try { - List completedSnapshotsList = - this.snapshotManager.getCompletedSnapshots(); - for (SnapshotProtos.SnapshotDescription snapshotDescription : completedSnapshotsList) { - long snapshotCreatedTime = snapshotDescription.getCreationTime(); - long snapshotTtl = snapshotDescription.getTtl(); - /* - * Backward compatibility after the patch deployment on HMaster - * Any snapshot with ttl 0 is to be considered as snapshot to keep FOREVER - * Default ttl value specified by {@HConstants.DEFAULT_SNAPSHOT_TTL} - */ - if (snapshotCreatedTime > 0 && snapshotTtl > 0 && - snapshotTtl < TimeUnit.MILLISECONDS.toSeconds(Long.MAX_VALUE)) { - long currentTime = EnvironmentEdgeManager.currentTime(); - if ((snapshotCreatedTime + TimeUnit.SECONDS.toMillis(snapshotTtl)) < currentTime) { - LOG.info("Event: {} Name: {}, CreatedTime: {}, TTL: {}, currentTime: {}", - DELETE_SNAPSHOT_EVENT, snapshotDescription.getName(), snapshotCreatedTime, - snapshotTtl, currentTime); - deleteExpiredSnapshot(snapshotDescription); + synchronized (MUTEX) { + if (LOG.isTraceEnabled()) { + LOG.trace("Snapshot Cleaner Chore is starting up..."); + } + try { + List completedSnapshotsList = + this.snapshotManager.getCompletedSnapshots(); + for (SnapshotProtos.SnapshotDescription snapshotDescription : completedSnapshotsList) { + long snapshotCreatedTime = snapshotDescription.getCreationTime(); + long snapshotTtl = snapshotDescription.getTtl(); + /* + * Backward compatibility after the patch deployment on HMaster + * Any snapshot with ttl 0 is to be considered as snapshot to keep FOREVER + * Default ttl value specified by {@HConstants.DEFAULT_SNAPSHOT_TTL} + */ + if (snapshotCreatedTime > 0 && snapshotTtl > 0 && + snapshotTtl < TimeUnit.MILLISECONDS.toSeconds(Long.MAX_VALUE)) { + long currentTime = EnvironmentEdgeManager.currentTime(); + if ((snapshotCreatedTime + TimeUnit.SECONDS.toMillis(snapshotTtl)) < currentTime) { + LOG.info("Event: {} Name: {}, CreatedTime: {}, TTL: {}, currentTime: {}", + DELETE_SNAPSHOT_EVENT, snapshotDescription.getName(), snapshotCreatedTime, + snapshotTtl, currentTime); + deleteExpiredSnapshot(snapshotDescription); + } } } + } catch (IOException e) { + LOG.error("Error while cleaning up Snapshots...", e); + } + if (LOG.isTraceEnabled()) { + LOG.trace("Snapshot Cleaner Chore is closing..."); } - } catch (IOException e) { - LOG.error("Error while cleaning up Snapshots...", e); - } - if (LOG.isTraceEnabled()) { - LOG.trace("Snapshot Cleaner Chore is closing..."); } } + public Object getMutexLockForSync() { + return MUTEX; + } + private void deleteExpiredSnapshot(SnapshotProtos.SnapshotDescription snapshotDescription) { try { this.snapshotManager.deleteSnapshot(snapshotDescription); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java index 769295497b..4eb7cab5f4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin2.java @@ -755,4 +755,31 @@ public class TestAdmin2 extends TestAdminBase { ADMIN.modifyTable(tableDesc); assertEquals(11111111, ADMIN.getDescriptor(tableName).getMaxFileSize()); } + + @Test + public void testSnapshotAutoCleanupAsync() throws Exception { + testSnapshotAutoCleanup(false); + } + + @Test + public void testSnapshotAutoCleanupSync() throws Exception { + testSnapshotAutoCleanup(true); + } + + private void testSnapshotAutoCleanup(final boolean synchronous) throws IOException { + final boolean initialState = ADMIN.isSnapshotAutoCleanupEnabled(); + // Switch the snapshot auto cleanup state to opposite to initial state + boolean prevState = ADMIN.snapshotAutoCleanupSwitch(!initialState, synchronous); + // The previous state should be the original state we observed + assertEquals(initialState, prevState); + // Current state should be opposite of the initial state + assertEquals(!initialState, ADMIN.isSnapshotAutoCleanupEnabled()); + // Reset the state back to what it was initially + prevState = ADMIN.snapshotAutoCleanupSwitch(initialState, synchronous); + // The previous state should be the opposite of the initial state + assertEquals(!initialState, prevState); + // Current state should be the original state again + assertEquals(initialState, ADMIN.isSnapshotAutoCleanupEnabled()); + } + } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java index cbfdd3f744..ae7a3172d4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.favored.FavoredNodesManager; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.master.cleaner.SnapshotCleanerChore; import org.apache.hadoop.hbase.master.locking.LockManager; import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; @@ -363,6 +364,11 @@ public class MockNoopMasterServices implements MasterServices { return null; } + @Override + public SnapshotCleanerChore getSnapshotCleanerChore() { + return null; + } + @Override public SnapshotManager getSnapshotManager() { return null; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java index e05213d1c8..1a28950915 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java @@ -105,7 +105,6 @@ public class TestSnapshotCleanerChore { snapshotManager = Mockito.mock(SnapshotManager.class); Stoppable stopper = new StoppableImplementation(); Configuration conf = getSnapshotCleanerConf(); - conf.setStrings("hbase.master.cleaner.snapshot.disable", "false"); SnapshotCleanerChore snapshotCleanerChore = new SnapshotCleanerChore(stopper, conf, snapshotManager); List snapshotDescriptionList = new ArrayList<>(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotFromMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotFromMaster.java index 20ab3349bc..aade69461a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotFromMaster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotFromMaster.java @@ -27,6 +27,7 @@ import java.util.Collection; import java.util.List; import java.util.Set; import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; @@ -64,6 +65,7 @@ import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.junit.After; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -74,12 +76,19 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.collect.Lists; +import org.apache.hbase.thirdparty.com.google.common.util.concurrent.Uninterruptibles; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.DeleteSnapshotRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .IsSnapshotAutoCleanupEnabledRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .IsSnapshotAutoCleanupEnabledResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos + .SetSnapshotAutoCleanupRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; /** @@ -142,6 +151,7 @@ public class TestSnapshotFromMaster { conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName()); conf.setInt("hbase.hfile.compactions.cleaner.interval", 20 * 1000); + conf.setInt("hbase.master.cleaner.snapshot.interval", 500); } @Before @@ -282,6 +292,89 @@ public class TestSnapshotFromMaster { master.getMasterRpcServices().deleteSnapshot(null, request); } + @Test + public void testGetCompletedSnapshotsWithAutoCleanup() throws Exception { + // Enable auto snapshot cleanup for the cluster + SetSnapshotAutoCleanupRequest setSnapshotAutoCleanupRequest = + SetSnapshotAutoCleanupRequest.newBuilder().setOn(true).build(); + master.getMasterRpcServices().switchSnapshotAutoCleanup(null, setSnapshotAutoCleanupRequest); + + // first check when there are no snapshots + GetCompletedSnapshotsRequest request = GetCompletedSnapshotsRequest.newBuilder().build(); + GetCompletedSnapshotsResponse response = + master.getMasterRpcServices().getCompletedSnapshots(null, request); + assertEquals("Found unexpected number of snapshots", 0, response.getSnapshotsCount()); + + // write one snapshot to the fs + createSnapshotWithTtl("snapshot_01", 1L); + createSnapshotWithTtl("snapshot_02", 10L); + + // check that we get one snapshot + response = master.getMasterRpcServices().getCompletedSnapshots(null, request); + assertEquals("Found unexpected number of snapshots", 2, response.getSnapshotsCount()); + + // check that 1 snapshot is auto cleaned after 1 sec of TTL expiration + Uninterruptibles.sleepUninterruptibly(2, TimeUnit.SECONDS); + response = master.getMasterRpcServices().getCompletedSnapshots(null, request); + assertEquals("Found unexpected number of snapshots", 1, response.getSnapshotsCount()); + } + + @Test + public void testGetCompletedSnapshotsWithoutAutoCleanup() throws Exception { + // Disable auto snapshot cleanup for the cluster + SetSnapshotAutoCleanupRequest setSnapshotAutoCleanupRequest = + SetSnapshotAutoCleanupRequest.newBuilder().setOn(false).build(); + master.getMasterRpcServices().switchSnapshotAutoCleanup(null, setSnapshotAutoCleanupRequest); + + // first check when there are no snapshots + GetCompletedSnapshotsRequest request = GetCompletedSnapshotsRequest.newBuilder().build(); + GetCompletedSnapshotsResponse response = + master.getMasterRpcServices().getCompletedSnapshots(null, request); + assertEquals("Found unexpected number of snapshots", 0, response.getSnapshotsCount()); + + // write one snapshot to the fs + createSnapshotWithTtl("snapshot_02", 1L); + createSnapshotWithTtl("snapshot_03", 1L); + + // check that we get one snapshot + response = master.getMasterRpcServices().getCompletedSnapshots(null, request); + assertEquals("Found unexpected number of snapshots", 2, response.getSnapshotsCount()); + + // check that no snapshot is auto cleaned even after 1 sec of TTL expiration + Uninterruptibles.sleepUninterruptibly(2, TimeUnit.SECONDS); + response = master.getMasterRpcServices().getCompletedSnapshots(null, request); + assertEquals("Found unexpected number of snapshots", 2, response.getSnapshotsCount()); + } + + @Test + public void testAutoSnapshotCleanupStatus() throws Exception { + // Enable auto snapshot cleanup for the cluster + SetSnapshotAutoCleanupRequest setSnapshotAutoCleanupRequest = + SetSnapshotAutoCleanupRequest.newBuilder().setOn(true).build(); + master.getMasterRpcServices().switchSnapshotAutoCleanup(null, setSnapshotAutoCleanupRequest); + + // Check if auto snapshot cleanup is enabled + IsSnapshotAutoCleanupEnabledRequest isSnapshotAutoCleanupEnabledRequest = + IsSnapshotAutoCleanupEnabledRequest.newBuilder().build(); + IsSnapshotAutoCleanupEnabledResponse isSnapshotAutoCleanupEnabledResponse = + master.getMasterRpcServices().isSnapshotAutoCleanupEnabled(null, + isSnapshotAutoCleanupEnabledRequest); + Assert.assertTrue(isSnapshotAutoCleanupEnabledResponse.getEnabled()); + + // Disable auto snapshot cleanup for the cluster + setSnapshotAutoCleanupRequest = SetSnapshotAutoCleanupRequest.newBuilder() + .setOn(false).build(); + master.getMasterRpcServices().switchSnapshotAutoCleanup(null, setSnapshotAutoCleanupRequest); + + // Check if auto snapshot cleanup is disabled + isSnapshotAutoCleanupEnabledRequest = IsSnapshotAutoCleanupEnabledRequest + .newBuilder().build(); + isSnapshotAutoCleanupEnabledResponse = + master.getMasterRpcServices().isSnapshotAutoCleanupEnabled(null, + isSnapshotAutoCleanupEnabledRequest); + Assert.assertTrue(isSnapshotAutoCleanupEnabledResponse.getEnabled()); + } + /** * Test that the snapshot hfile archive cleaner works correctly. HFiles that are in snapshots * should be retained, while those that are not in a snapshot should be deleted. @@ -428,6 +521,16 @@ public class TestSnapshotFromMaster { return builder.getSnapshotDescription(); } + private SnapshotDescription createSnapshotWithTtl(final String snapshotName, final long ttl) + throws IOException { + SnapshotTestingUtils.SnapshotMock snapshotMock = + new SnapshotTestingUtils.SnapshotMock(UTIL.getConfiguration(), fs, rootDir); + SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = + snapshotMock.createSnapshotV2(snapshotName, "test", 0, ttl); + builder.commit(); + return builder.getSnapshotDescription(); + } + @Test public void testAsyncSnapshotWillNotBlockSnapshotHFileCleaner() throws Exception { // Write some data diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java index f9ca75499d..11dc5b0f96 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.FSVisitor; @@ -642,6 +643,12 @@ public final class SnapshotTestingUtils { return createSnapshot(snapshotName, tableName, numRegions, SnapshotManifestV2.DESCRIPTOR_VERSION); } + public SnapshotBuilder createSnapshotV2(final String snapshotName, final String tableName, + final int numRegions, final long ttl) throws IOException { + return createSnapshot(snapshotName, tableName, numRegions, + SnapshotManifestV2.DESCRIPTOR_VERSION, ttl); + } + private SnapshotBuilder createSnapshot(final String snapshotName, final String tableName, final int version) throws IOException { return createSnapshot(snapshotName, tableName, TEST_NUM_REGIONS, version); @@ -663,6 +670,22 @@ public final class SnapshotTestingUtils { return new SnapshotBuilder(conf, fs, rootDir, htd, desc, regions); } + private SnapshotBuilder createSnapshot(final String snapshotName, final String tableName, + final int numRegions, final int version, final long ttl) throws IOException { + TableDescriptor htd = createHtd(tableName); + RegionData[] regions = createTable(htd, numRegions); + SnapshotProtos.SnapshotDescription desc = SnapshotProtos.SnapshotDescription.newBuilder() + .setTable(htd.getTableName().getNameAsString()) + .setName(snapshotName) + .setVersion(version) + .setCreationTime(EnvironmentEdgeManager.currentTime()) + .setTtl(ttl) + .build(); + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(desc, rootDir, conf); + SnapshotDescriptionUtils.writeSnapshotInfo(desc, workingDir, fs); + return new SnapshotBuilder(conf, fs, rootDir, htd, desc, regions); + } + public TableDescriptor createHtd(final String tableName) { return TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)) .setColumnFamily(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY)) diff --git a/hbase-shell/src/main/ruby/hbase/admin.rb b/hbase-shell/src/main/ruby/hbase/admin.rb index 8d85edd166..3daf0c0216 100644 --- a/hbase-shell/src/main/ruby/hbase/admin.rb +++ b/hbase-shell/src/main/ruby/hbase/admin.rb @@ -555,6 +555,22 @@ module Hbase @admin.getDescriptor(TableName.valueOf(table_name)).toStringTableAttributes end + #---------------------------------------------------------------------------------------------- + # Enable/disable snapshot auto-cleanup based on TTL expiration + # Returns previous snapshot auto-cleanup switch setting. + def snapshot_auto_cleanup_switch(enable_disable) + @admin.snapshotAutoCleanupSwitch( + java.lang.Boolean.valueOf(enable_disable), java.lang.Boolean.valueOf(false) + ) + end + + #---------------------------------------------------------------------------------------------- + # Query the current state of the snapshot auto-cleanup based on TTL + # Returns the snapshot auto-cleanup state (true if enabled) + def snapshot_auto_cleanup_enabled? + @admin.isSnapshotAutoCleanupEnabled + end + #---------------------------------------------------------------------------------------------- # Truncates table (deletes all records by recreating the table) def truncate(table_name_str) diff --git a/hbase-shell/src/main/ruby/shell.rb b/hbase-shell/src/main/ruby/shell.rb index 5d951cac42..682dbde3a9 100644 --- a/hbase-shell/src/main/ruby/shell.rb +++ b/hbase-shell/src/main/ruby/shell.rb @@ -354,6 +354,8 @@ Shell.load_command_group( compact_rs compaction_state trace + snapshot_auto_cleanup_switch + snapshot_auto_cleanup_enabled splitormerge_switch splitormerge_enabled clear_compaction_queues diff --git a/hbase-shell/src/main/ruby/shell/commands/snapshot_auto_cleanup_enabled.rb b/hbase-shell/src/main/ruby/shell/commands/snapshot_auto_cleanup_enabled.rb new file mode 100644 index 0000000000..1866198bd6 --- /dev/null +++ b/hbase-shell/src/main/ruby/shell/commands/snapshot_auto_cleanup_enabled.rb @@ -0,0 +1,39 @@ +# +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with this +# work for additional information regarding copyright ownership. The ASF +# licenses this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Prints if snapshot auto cleanup based on TTL is enabled + +module Shell + module Commands + class SnapshotAutoCleanupEnabled < Command + def help + <<-EOF +Query the snapshot auto-cleanup state. +Examples: + + hbase> snapshot_auto_cleanup_enabled + EOF + end + + def command + state = admin.snapshot_auto_cleanup_enabled? + formatter.row([state.to_s]) + state + end + end + end +end diff --git a/hbase-shell/src/main/ruby/shell/commands/snapshot_auto_cleanup_switch.rb b/hbase-shell/src/main/ruby/shell/commands/snapshot_auto_cleanup_switch.rb new file mode 100644 index 0000000000..8b263d7386 --- /dev/null +++ b/hbase-shell/src/main/ruby/shell/commands/snapshot_auto_cleanup_switch.rb @@ -0,0 +1,43 @@ +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Switch snapshot auto-cleanup based on TTL expiration + +module Shell + module Commands + class SnapshotAutoCleanupSwitch < Command + def help + <<-EOF +Enable/Disable snapshot auto-cleanup based on snapshot TTL. +Returns previous snapshot auto-cleanup switch state. +Examples: + + hbase> snapshot_auto_cleanup_switch true + hbase> snapshot_auto_cleanup_switch false + EOF + end + + def command(enable_disable) + prev_state = admin.snapshot_auto_cleanup_switch(enable_disable) ? 'true' : 'false' + formatter.row(["Previous snapshot auto-cleanup state : #{prev_state}"]) + prev_state + end + end + end +end diff --git a/hbase-shell/src/test/ruby/hbase/admin_test.rb b/hbase-shell/src/test/ruby/hbase/admin_test.rb index 959159c203..518b4bdd06 100644 --- a/hbase-shell/src/test/ruby/hbase/admin_test.rb +++ b/hbase-shell/src/test/ruby/hbase/admin_test.rb @@ -182,6 +182,20 @@ module Hbase end end + #------------------------------------------------------------------------------- + + define_test 'snapshot auto cleanup should work' do + command(:snapshot_auto_cleanup_switch, true) + output = capture_stdout { command(:snapshot_auto_cleanup_enabled) } + assert(output.include?('true')) + + command(:snapshot_auto_cleanup_switch, false) + output = capture_stdout { command(:snapshot_auto_cleanup_enabled) } + assert(output.include?('false')) + end + + #------------------------------------------------------------------------------- + define_test "create should fail with non-string/non-hash column args" do assert_raise(ArgumentError) do command(:create, @create_test_name, 123) diff --git a/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java b/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java index d7aea33f99..94ec48cbeb 100644 --- a/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java +++ b/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java @@ -1142,6 +1142,16 @@ public class ThriftAdmin implements Admin { throw new NotImplementedException("hasUserPermissions not supported in ThriftAdmin"); } + @Override + public boolean snapshotAutoCleanupSwitch(boolean on, boolean synchronous) { + throw new NotImplementedException("snapshotAutoCleanupSwitch not supported in ThriftAdmin"); + } + + @Override + public boolean isSnapshotAutoCleanupEnabled() { + throw new NotImplementedException("isSnapshotAutoCleanupEnabled not supported in ThriftAdmin"); + } + @Override public Future splitRegionAsync(byte[] regionName) throws IOException { return splitRegionAsync(regionName, null); diff --git a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/SnapshotAutoCleanupTracker.java b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/SnapshotAutoCleanupTracker.java new file mode 100644 index 0000000000..f65e581805 --- /dev/null +++ b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/SnapshotAutoCleanupTracker.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.zookeeper; + +import java.io.IOException; + +import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.yetus.audience.InterfaceAudience; +import org.apache.zookeeper.KeeperException; + +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotAutoCleanupProtos; + +/** + * Tracks status of snapshot auto cleanup based on TTL + */ +@InterfaceAudience.Private +public class SnapshotAutoCleanupTracker extends ZKNodeTracker { + + /** + * Constructs a new ZK node tracker. + * + *

After construction, use {@link #start} to kick off tracking. + * + * @param watcher reference to the {@link ZKWatcher} which also contains configuration and + * constants + * @param abortable used to abort if a fatal error occurs + */ + public SnapshotAutoCleanupTracker(ZKWatcher watcher, Abortable abortable) { + super(watcher, watcher.getZNodePaths().snapshotAutoCleanupZNode, abortable); + } + + /** + * Returns the current state of the snapshot auto cleanup based on TTL + * + * @return true if the snapshot auto cleanup is enabled, + * false otherwise. + */ + public boolean isSnapshotAutoCleanupEnabled() { + byte[] snapshotAutoCleanupZNodeData = super.getData(false); + try { + // if data in ZK is null, use default of on. + return snapshotAutoCleanupZNodeData == null || + parseFrom(snapshotAutoCleanupZNodeData).getSnapshotAutoCleanupOn(); + } catch (DeserializationException dex) { + LOG.error("ZK state for Snapshot Auto Cleanup could not be parsed " + + Bytes.toStringBinary(snapshotAutoCleanupZNodeData), dex); + // return false to be safe. + return false; + } + } + + /** + * Set snapshot auto clean on/off + * + * @param snapshotAutoCleanupOn true if the snapshot auto cleanup should be on, + * false otherwise + * @throws KeeperException if ZooKeeper operation fails + */ + public void setSnapshotAutoCleanupOn(final boolean snapshotAutoCleanupOn) + throws KeeperException { + byte [] snapshotAutoCleanupZNodeData = toByteArray(snapshotAutoCleanupOn); + try { + ZKUtil.setData(watcher, watcher.getZNodePaths().snapshotAutoCleanupZNode, + snapshotAutoCleanupZNodeData); + } catch(KeeperException.NoNodeException nne) { + ZKUtil.createAndWatch(watcher, watcher.getZNodePaths().snapshotAutoCleanupZNode, + snapshotAutoCleanupZNodeData); + } + super.nodeDataChanged(watcher.getZNodePaths().snapshotAutoCleanupZNode); + } + + private byte[] toByteArray(final boolean isSnapshotAutoCleanupOn) { + SnapshotAutoCleanupProtos.SnapshotAutoCleanupState.Builder builder = + SnapshotAutoCleanupProtos.SnapshotAutoCleanupState.newBuilder(); + builder.setSnapshotAutoCleanupOn(isSnapshotAutoCleanupOn); + return ProtobufUtil.prependPBMagic(builder.build().toByteArray()); + } + + private SnapshotAutoCleanupProtos.SnapshotAutoCleanupState parseFrom(final byte[] pbBytes) + throws DeserializationException { + ProtobufUtil.expectPBMagicPrefix(pbBytes); + SnapshotAutoCleanupProtos.SnapshotAutoCleanupState.Builder builder = + SnapshotAutoCleanupProtos.SnapshotAutoCleanupState.newBuilder(); + try { + int magicLen = ProtobufUtil.lengthOfPBMagic(); + ProtobufUtil.mergeFrom(builder, pbBytes, magicLen, pbBytes.length - magicLen); + } catch (IOException e) { + throw new DeserializationException(e); + } + return builder.build(); + } + +} diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index 0295f42113..ffa6101a6b 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -2879,13 +2879,43 @@ Value 0 for this config indicates TTL: FOREVER +.Enable/Disable Snapshot Auto Cleanup on running cluster: -At any point of time, if Snapshot cleanup is supposed to be stopped due to -some snapshot restore activity, it is advisable to disable Snapshot Cleaner with - config: +By default, snapshot auto cleanup based on TTL would be enabled +for any new cluster. +At any point in time, if snapshot cleanup is supposed to be stopped due to +some snapshot restore activity or any other reason, it is advisable +to disable it using shell command: -`hbase.master.cleaner.snapshot.disable`: "true" +---- +hbase> snapshot_auto_cleanup_switch false +---- + +We can re-enable it using: + +---- +hbase> snapshot_auto_cleanup_switch true +---- + +The shell command with switch false would disable snapshot auto +cleanup activity based on TTL and return the previous state of +the activity(true: running already, false: disabled already) + +A sample output for above commands: +---- +Previous snapshot auto-cleanup state : true +Took 0.0069 seconds +=> "true" +---- + +We can query whether snapshot auto cleanup is enabled for +cluster using: + +---- +hbase> snapshot_auto_cleanup_enabled +---- +The command would return output in true/false. [[ops.snapshots.list]] === Listing Snapshots -- 2.17.2 (Apple Git-113)