From 141a38a13cb3559808f469d789927732740c17c7 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 11 Jul 2019 00:17:41 +0530 Subject: [PATCH 1/3] HBASE-22648 : Introducing Snapshot TTL --- .../org/apache/hadoop/hbase/client/Admin.java | 45 +++++ .../hbase/client/SnapshotDescription.java | 41 +++-- .../hbase/shaded/protobuf/ProtobufUtil.java | 15 +- .../ClientSnapshotDescriptionUtils.java | 3 +- .../org/apache/hadoop/hbase/HConstants.java | 3 + .../src/main/protobuf/Snapshot.proto | 9 +- hbase-protocol/src/main/protobuf/HBase.proto | 7 +- .../apache/hadoop/hbase/master/HMaster.java | 6 + .../master/cleaner/SnapshotCleanerChore.java | 99 ++++++++++ .../snapshot/SnapshotDescriptionUtils.java | 13 ++ .../hadoop/hbase/snapshot/SnapshotInfo.java | 11 +- .../hbase-webapps/master/snapshot.jsp | 2 + .../hbase-webapps/master/snapshotsStats.jsp | 2 + .../hbase/client/TestSnapshotFromClient.java | 2 +- .../TestSnapshotTemporaryDirectory.java | 5 +- .../cleaner/TestSnapshotCleanerChore.java | 173 ++++++++++++++++++ hbase-shell/src/main/ruby/hbase/admin.rb | 6 +- 17 files changed, 404 insertions(+), 38 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java index 6bf5ae9dc2..a99753029d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java @@ -1372,6 +1372,51 @@ public interface Admin extends Abortable, Closeable { snapshot(new SnapshotDescription(snapshotName, tableName, type)); } + /** + * Create typed snapshot of the table. Snapshots are considered unique based on the name of the + * snapshot. Snapshots are taken sequentially even when requested concurrently, across + * all tables. Attempts to take a snapshot with the same name (even a different type or with + * different parameters) will fail with a {@link SnapshotCreationException} indicating the + * duplicate naming. Snapshot names follow the same naming constraints as tables in HBase. See + * {@link org.apache.hadoop.hbase.TableName#isLegalFullyQualifiedTableName(byte[])}. + * Snapshot can live with ttl seconds. + * + * @param snapshotName name to give the snapshot on the filesystem. Must be unique from all other + * snapshots stored on the cluster + * @param tableName name of the table to snapshot + * @param type type of snapshot to take + * @param ttl time to live in seconds. After expiry, the snapshot should be deleted + * @throws IOException we fail to reach the master + * @throws SnapshotCreationException if snapshot creation failed + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + default void snapshot(String snapshotName, TableName tableName, SnapshotType type, Long ttl) + throws IOException, SnapshotCreationException, IllegalArgumentException { + snapshot(new SnapshotDescription(snapshotName, tableName, type, ttl)); + } + + /** + * Create typed snapshot of the table. Snapshots are considered unique based on the name of the + * snapshot. Snapshots are taken sequentially even when requested concurrently, across + * all tables. Attempts to take a snapshot with the same name (even a different type or with + * different parameters) will fail with a {@link SnapshotCreationException} indicating the + * duplicate naming. Snapshot names follow the same naming constraints as tables in HBase. See + * {@link org.apache.hadoop.hbase.TableName#isLegalFullyQualifiedTableName(byte[])}. + * Snapshot can live with ttl seconds. + * + * @param snapshotName name to give the snapshot on the filesystem. Must be unique from all other + * snapshots stored on the cluster + * @param tableName name of the table to snapshot + * @param ttl time to live in seconds. After expiry, the snapshot should be deleted + * @throws IOException we fail to reach the master + * @throws SnapshotCreationException if snapshot creation failed + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + default void snapshot(String snapshotName, TableName tableName, Long ttl) + throws IOException, SnapshotCreationException, IllegalArgumentException { + snapshot(new SnapshotDescription(snapshotName, tableName, SnapshotType.FLUSH, ttl)); + } + /** * Take a snapshot and wait for the server to complete that snapshot (blocking). Snapshots are * considered unique based on the name of the snapshot. Snapshots are taken sequentially diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java index 0b6f196d61..9e07b6adcb 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java @@ -30,6 +30,7 @@ public class SnapshotDescription { private final SnapshotType snapShotType; private final String owner; private final long creationTime; + private final long ttl; private final int version; public SnapshotDescription(String name) { @@ -48,7 +49,7 @@ public class SnapshotDescription { } public SnapshotDescription(String name, TableName table) { - this(name, table, SnapshotType.DISABLED, null); + this(name, table, SnapshotType.DISABLED, null, -1, -1, -1); } /** @@ -63,14 +64,14 @@ public class SnapshotDescription { } public SnapshotDescription(String name, TableName table, SnapshotType type) { - this(name, table, type, null); + this(name, table, type, null, -1, -1, -1); } /** - * @deprecated since 2.0.0 and will be removed in 3.0.0. Use the version with the TableName - * instance instead. * @see #SnapshotDescription(String, TableName, SnapshotType, String) * @see HBASE-16892 + * @deprecated since 2.0.0 and will be removed in 3.0.0. Use the version with the TableName + * instance instead. */ @Deprecated public SnapshotDescription(String name, String table, SnapshotType type, String owner) { @@ -78,31 +79,37 @@ public class SnapshotDescription { } public SnapshotDescription(String name, TableName table, SnapshotType type, String owner) { - this(name, table, type, owner, -1, -1); + this(name, table, type, owner, -1, -1, -1); } /** - * @deprecated since 2.0.0 and will be removed in 3.0.0. Use the version with the TableName - * instance instead. - * @see #SnapshotDescription(String, TableName, SnapshotType, String, long, int) + * @see #SnapshotDescription(String, TableName, SnapshotType, String, long, long, int) * @see HBASE-16892 + * @deprecated since 2.0.0 and will be removed in 3.0.0. Use the version with the TableName + * instance instead. */ @Deprecated public SnapshotDescription(String name, String table, SnapshotType type, String owner, long creationTime, int version) { - this(name, TableName.valueOf(table), type, owner, creationTime, version); + this(name, TableName.valueOf(table), type, owner, creationTime, -1, version); } public SnapshotDescription(String name, TableName table, SnapshotType type, String owner, - long creationTime, int version) { + long creationTime, long ttl, int version) { this.name = name; this.table = table; this.snapShotType = type; this.owner = owner; this.creationTime = creationTime; + this.ttl = ttl; this.version = version; } + public SnapshotDescription(String snapshotName, TableName tableName, SnapshotType type, + Long ttl) { + this(snapshotName, tableName, type, null, -1, ttl, -1); + } + public String getName() { return this.name; } @@ -139,15 +146,21 @@ public class SnapshotDescription { return this.creationTime; } + // get snapshot ttl in sec + public long getTtl() { + return ttl; + } + public int getVersion() { return this.version; } @Override public String toString() { - return "SnapshotDescription: name = " + ((name != null) ? name : null) + "/table = " - + ((table != null) ? table : null) + " /owner = " + ((owner != null) ? owner : null) - + (creationTime != -1 ? ("/creationtime = " + creationTime) : "") - + (version != -1 ? ("/version = " + version) : ""); + return "SnapshotDescription: name = " + name + "/table = " + + table + " /owner = " + owner + + (creationTime != -1 ? ("/creationtime = " + creationTime) : "") + + (ttl != -1 ? ("/ttl = " + ttl) : "") + + (version != -1 ? ("/version = " + version) : ""); } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java index 0072f8922b..ae067a5873 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java @@ -2932,12 +2932,17 @@ public final class ProtobufUtil { if (snapshotDesc.getCreationTime() != -1L) { builder.setCreationTime(snapshotDesc.getCreationTime()); } + if (snapshotDesc.getTtl() != -1L && snapshotDesc.getTtl() < Long.MAX_VALUE / 1000) { + builder.setTtl(snapshotDesc.getTtl()); + } else { + // default snapshot ttl(sec) in case ttl not provided or not within range + builder.setTtl(HConstants.DEFAULT_SNAPSHOT_TTL); + } if (snapshotDesc.getVersion() != -1) { builder.setVersion(snapshotDesc.getVersion()); } builder.setType(ProtobufUtil.createProtosSnapShotDescType(snapshotDesc.getType())); - SnapshotProtos.SnapshotDescription snapshot = builder.build(); - return snapshot; + return builder.build(); } /** @@ -2950,9 +2955,9 @@ public final class ProtobufUtil { public static SnapshotDescription createSnapshotDesc(SnapshotProtos.SnapshotDescription snapshotDesc) { return new SnapshotDescription(snapshotDesc.getName(), - snapshotDesc.hasTable() ? TableName.valueOf(snapshotDesc.getTable()) : null, - createSnapshotType(snapshotDesc.getType()), snapshotDesc.getOwner(), - snapshotDesc.getCreationTime(), snapshotDesc.getVersion()); + snapshotDesc.hasTable() ? TableName.valueOf(snapshotDesc.getTable()) : null, + createSnapshotType(snapshotDesc.getType()), snapshotDesc.getOwner(), + snapshotDesc.getCreationTime(), snapshotDesc.getTtl(), snapshotDesc.getVersion()); } public static RegionLoadStats createRegionLoadStats(ClientProtos.RegionLoadStats stats) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/snapshot/ClientSnapshotDescriptionUtils.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/snapshot/ClientSnapshotDescriptionUtils.java index 5a25c52bc0..582869ef33 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/snapshot/ClientSnapshotDescriptionUtils.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/snapshot/ClientSnapshotDescriptionUtils.java @@ -63,6 +63,7 @@ public class ClientSnapshotDescriptionUtils { } return "{ ss=" + ssd.getName() + " table=" + (ssd.hasTable()?TableName.valueOf(ssd.getTable()):"") + - " type=" + ssd.getType() + " }"; + " type=" + ssd.getType() + + " ttl=" + ssd.getTtl() + " }"; } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 86e1f712e2..c1833bcb95 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -1445,6 +1445,9 @@ public final class HConstants { "hbase.util.default.lossycounting.errorrate"; public static final String NOT_IMPLEMENTED = "Not implemented"; + // Default Snapshot TTL - 30 days (good enough?) + public static final long DEFAULT_SNAPSHOT_TTL = 24 * 3600 * 30; + private HConstants() { // Can't be instantiated with this ctor. } diff --git a/hbase-protocol-shaded/src/main/protobuf/Snapshot.proto b/hbase-protocol-shaded/src/main/protobuf/Snapshot.proto index 479e33ee54..28179ba913 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Snapshot.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Snapshot.proto @@ -35,15 +35,16 @@ message SnapshotDescription { required string name = 1; optional string table = 2; // not needed for delete, but checked for in taking snapshot optional int64 creation_time = 3 [default = 0]; + optional int64 ttl = 4 [default = 0]; enum Type { DISABLED = 0; FLUSH = 1; SKIPFLUSH = 2; } - optional Type type = 4 [default = FLUSH]; - optional int32 version = 5; - optional string owner = 6; - optional UsersAndPermissions users_and_permissions = 7; + optional Type type = 5 [default = FLUSH]; + optional int32 version = 6; + optional string owner = 7; + optional UsersAndPermissions users_and_permissions = 8; } message SnapshotFileInfo { diff --git a/hbase-protocol/src/main/protobuf/HBase.proto b/hbase-protocol/src/main/protobuf/HBase.proto index 7d064f3304..309295e96e 100644 --- a/hbase-protocol/src/main/protobuf/HBase.proto +++ b/hbase-protocol/src/main/protobuf/HBase.proto @@ -176,14 +176,15 @@ message SnapshotDescription { required string name = 1; optional string table = 2; // not needed for delete, but checked for in taking snapshot optional int64 creation_time = 3 [default = 0]; + optional int64 ttl = 4 [default = 0]; enum Type { DISABLED = 0; FLUSH = 1; SKIPFLUSH = 2; } - optional Type type = 4 [default = FLUSH]; - optional int32 version = 5; - optional string owner = 6; + optional Type type = 5 [default = FLUSH]; + optional int32 version = 6; + optional string owner = 7; } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 5e0362345f..18f8cae7c5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -114,6 +114,7 @@ import org.apache.hadoop.hbase.master.cleaner.CleanerChore; import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; import org.apache.hadoop.hbase.master.cleaner.LogCleaner; import org.apache.hadoop.hbase.master.cleaner.ReplicationBarrierCleaner; +import org.apache.hadoop.hbase.master.cleaner.SnapshotCleanerChore; import org.apache.hadoop.hbase.master.locking.LockManager; import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan.PlanType; @@ -382,6 +383,7 @@ public class HMaster extends HRegionServer implements MasterServices { private RegionNormalizerChore normalizerChore; private ClusterStatusChore clusterStatusChore; private ClusterStatusPublisher clusterStatusPublisherChore = null; + private SnapshotCleanerChore snapshotCleanerChore; CatalogJanitor catalogJanitorChore; private LogCleaner logCleaner; @@ -1457,6 +1459,9 @@ public class HMaster extends HRegionServer implements MasterServices { replicationPeerManager); getChoreService().scheduleChore(replicationBarrierCleaner); + this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager()); + getChoreService().scheduleChore(this.snapshotCleanerChore); + serviceStarted = true; if (LOG.isTraceEnabled()) { LOG.trace("Started service threads"); @@ -1574,6 +1579,7 @@ public class HMaster extends HRegionServer implements MasterServices { choreService.cancelChore(this.logCleaner); choreService.cancelChore(this.hfileCleaner); choreService.cancelChore(this.replicationBarrierCleaner); + choreService.cancelChore(this.snapshotCleanerChore); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java new file mode 100644 index 0000000000..9b24e515fb --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.cleaner; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ScheduledChore; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; + +/** + * This chore, every time it runs, will try to delete snapshots that are expired based on TTL in + * seconds configured for each Snapshot + */ +@InterfaceAudience.Private +public class SnapshotCleanerChore extends ScheduledChore { + + private static final Logger LOGGER = LoggerFactory.getLogger(SnapshotCleanerChore.class); + private static final String SNAPSHOT_CLEANER_CHORE_NAME = "SnapshotCleaner"; + private static final String SNAPSHOT_CLEANER_INTERVAL = "hbase.master.cleaner.snapshot.interval"; + private static final int SNAPSHOT_CLEANER_DEFAULT_INTERVAL = 600 * 1000; // Default 10 min + private static final String DELETE_SNAPSHOT_EVENT = + "Eligible Snapshot for cleanup due to expired TTL."; + + private final SnapshotManager snapshotManager; + + public SnapshotCleanerChore(Stoppable stopper, Configuration configuration, + SnapshotManager snapshotManager) { + super(SNAPSHOT_CLEANER_CHORE_NAME, stopper, configuration.getInt(SNAPSHOT_CLEANER_INTERVAL, + SNAPSHOT_CLEANER_DEFAULT_INTERVAL)); + this.snapshotManager = snapshotManager; + } + + @Override + protected void chore() { + LOGGER.trace("Snapshot Cleaner Chore is starting up..."); + try { + List completedSnapshotsList = + this.snapshotManager.getCompletedSnapshots(); + if (CollectionUtils.isNotEmpty(completedSnapshotsList)) { + for (SnapshotProtos.SnapshotDescription snapshotDescription : completedSnapshotsList) { + long snapshotCreatedTime = snapshotDescription.getCreationTime(); + long snapshotTtl = snapshotDescription.getTtl(); + /* + * Backward compatibility after the patch deployment on HMaster + * Any snapshot with negative or zero ttl should not be deleted + * Default ttl value specified by {@HConstants.DEFAULT_SNAPSHOT_TTL} + */ + if (snapshotCreatedTime > 0 && snapshotTtl > 0 && + snapshotTtl < (Long.MAX_VALUE / 1000)) { + long currentTime = EnvironmentEdgeManager.currentTime(); + if ((snapshotCreatedTime + snapshotTtl * 1000) < currentTime) { + LOGGER.info("Event: {} Name: {}, CreatedTime: {}, TTL: {}, currentTime: {}", + DELETE_SNAPSHOT_EVENT, snapshotDescription.getName(), snapshotCreatedTime, + snapshotTtl, currentTime); + deleteExpiredSnapshot(snapshotDescription); + } + } + } + } + } catch (IOException e) { + LOGGER.error("Error while cleaning up Snapshots. Stopping Snapshot Cleaner Chore...", e); + } + LOGGER.trace("Snapshot Cleaner Chore is closing..."); + } + + private void deleteExpiredSnapshot(SnapshotProtos.SnapshotDescription snapshotDescription) { + try { + this.snapshotManager.deleteSnapshot(snapshotDescription); + } catch (Exception e) { + LOGGER.error("Error while deleting Snapshot: {}", snapshotDescription.getName(), e); + } + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java index 18d8b769d8..e5a99ed17d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java @@ -124,6 +124,9 @@ public final class SnapshotDescriptionUtils { /** Default value if no start time is specified */ public static final long NO_SNAPSHOT_START_TIME_SPECIFIED = 0; + // Default value if no ttl is specified for Snapshot + private static final long NO_SNAPSHOT_TTL_SPECIFIED = 0; + public static final String MASTER_SNAPSHOT_TIMEOUT_MILLIS = "hbase.snapshot.master.timeout.millis"; /** By default, wait 300 seconds for a snapshot to complete */ @@ -300,6 +303,16 @@ public final class SnapshotDescriptionUtils { snapshot = builder.build(); } + long ttl = snapshot.getTtl(); + // set default ttl(sec) if it is not set already or the value is out of the range + if (ttl == SnapshotDescriptionUtils.NO_SNAPSHOT_TTL_SPECIFIED || + ttl > (Long.MAX_VALUE / 1000)) { + ttl = HConstants.DEFAULT_SNAPSHOT_TTL; + } + SnapshotDescription.Builder builder = snapshot.toBuilder(); + builder.setTtl(ttl); + snapshot = builder.build(); + // set the acl to snapshot if security feature is enabled. if (isSecurityAvailable(conf)) { snapshot = writeAclToSnapshotDescription(snapshot, conf); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java index b1514f7b09..b78ba5efc9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java @@ -374,12 +374,12 @@ public final class SnapshotInfo extends AbstractHBaseTool { // List Available Snapshots if (listSnapshots) { SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); - System.out.printf("%-20s | %-20s | %s%n", "SNAPSHOT", "CREATION TIME", "TABLE NAME"); + System.out.printf("%-20s | %-20s | %-20s | %s%n", "SNAPSHOT", "CREATION TIME", "TTL IN SEC", + "TABLE NAME"); for (SnapshotDescription desc: getSnapshotList(conf)) { - System.out.printf("%-20s | %20s | %s%n", - desc.getName(), - df.format(new Date(desc.getCreationTime())), - desc.getTableNameAsString()); + System.out.printf("%-20s | %20s | %20s | %s%n", desc.getName(), + df.format(new Date(desc.getCreationTime())), desc.getTtl(), + desc.getTableNameAsString()); } return 0; } @@ -432,6 +432,7 @@ public final class SnapshotInfo extends AbstractHBaseTool { System.out.println(" Table: " + snapshotDesc.getTable()); System.out.println(" Format: " + snapshotDesc.getVersion()); System.out.println("Created: " + df.format(new Date(snapshotDesc.getCreationTime()))); + System.out.println(" Ttl: " + snapshotDesc.getTtl()); System.out.println(" Owner: " + snapshotDesc.getOwner()); System.out.println(); } diff --git a/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp b/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp index 6e1c9eb4cf..1f9a2f2e03 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp @@ -84,6 +84,7 @@ Table Creation Time + Time To Live(Sec) Type Format Version State @@ -99,6 +100,7 @@ <% } %> <%= new Date(snapshot.getCreationTime()) %> + <%= snapshot.getTtl() %> <%= snapshot.getType() %> <%= snapshot.getVersion() %> <% if (stats.isSnapshotCorrupted()) { %> diff --git a/hbase-server/src/main/resources/hbase-webapps/master/snapshotsStats.jsp b/hbase-server/src/main/resources/hbase-webapps/master/snapshotsStats.jsp index 719ab4f220..5e3d058084 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/snapshotsStats.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/snapshotsStats.jsp @@ -65,6 +65,7 @@ Snapshot Name Table Creation Time + TTL(Sec) Shared Storefile Size Mob Storefile Size Archived Storefile Size @@ -82,6 +83,7 @@ <%= snapshotTable.getNameAsString() %> <%= new Date(snapshotDesc.getCreationTime()) %> + <%= snapshotDesc.getTtl() %> <%= StringUtils.humanReadableInt(stats.getSharedStoreFilesSize()) %> <%= StringUtils.humanReadableInt(stats.getMobStoreFilesSize()) %> <%= StringUtils.humanReadableInt(stats.getArchivedStoreFileSize()) %> diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java index 45a36205ca..0f4a4f8cb7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java @@ -219,7 +219,7 @@ public class TestSnapshotFromClient { String snapshot = SNAPSHOT_NAME; admin.snapshot(new SnapshotDescription(SNAPSHOT_NAME, TABLE_NAME, - SnapshotType.DISABLED, null, -1, SnapshotManifestV1.DESCRIPTOR_VERSION)); + SnapshotType.DISABLED, null, -1, 1000, SnapshotManifestV1.DESCRIPTOR_VERSION)); LOG.debug("Snapshot completed."); // make sure we have the snapshot diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotTemporaryDirectory.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotTemporaryDirectory.java index face3a2701..2e2981957b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotTemporaryDirectory.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotTemporaryDirectory.java @@ -471,9 +471,8 @@ public class TestSnapshotTemporaryDirectory { private void takeSnapshot(TableName tableName, String snapshotName, boolean disabled) throws IOException { SnapshotType type = disabled ? SnapshotType.DISABLED : SnapshotType.FLUSH; - SnapshotDescription desc = - new SnapshotDescription(snapshotName, tableName.getNameAsString(), type, null, -1, - manifestVersion); + SnapshotDescription desc = new SnapshotDescription(snapshotName, tableName, type, null, -1, + -1, manifestVersion); admin.snapshot(desc); } } \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java new file mode 100644 index 0000000000..6139f9b623 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.cleaner; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +@Category({MasterTests.class, SmallTests.class}) +public class TestSnapshotCleanerChore { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSnapshotCleanerChore.class); + + private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotCleanerChore.class); + + private static final HBaseTestingUtility HBASE_TESTING_UTILITY = new HBaseTestingUtility(); + + private SnapshotManager snapshotManager; + + private Configuration getSnapshotCleanerConf() { + Configuration conf = HBASE_TESTING_UTILITY.getConfiguration(); + conf.setInt("hbase.master.cleaner.snapshot.interval", 100); + return conf; + } + + + @Test + public void testSnapshotCleanerWithoutAnyCompletedSnapshot() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = getSnapshotCleanerConf(); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + try { + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(0)).deleteSnapshot(Mockito.any()); + } + + @Test + public void testSnapshotCleanerWithNoTtlExpired() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = getSnapshotCleanerConf(); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + List snapshotDescriptionList = new ArrayList<>(); + snapshotDescriptionList.add(getSnapshotDescription(-2, "snapshot01", "table01", + EnvironmentEdgeManager.currentTime() - 100000)); + snapshotDescriptionList.add(getSnapshotDescription(10, "snapshot02", "table02", + EnvironmentEdgeManager.currentTime())); + Mockito.when(snapshotManager.getCompletedSnapshots()).thenReturn(snapshotDescriptionList); + try { + LOG.debug("2 Snapshots are completed but TTL is not expired for any of them"); + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(0)).deleteSnapshot(Mockito.any()); + } + + @Test + public void testSnapshotCleanerWithSomeTtlExpired() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = getSnapshotCleanerConf(); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + List snapshotDescriptionList = new ArrayList<>(); + snapshotDescriptionList.add(getSnapshotDescription(10, "snapshot01", "table01", 1)); + snapshotDescriptionList.add(getSnapshotDescription(5, "snapshot02", "table02", 2)); + snapshotDescriptionList.add(getSnapshotDescription(30, "snapshot01", "table01", + EnvironmentEdgeManager.currentTime())); + snapshotDescriptionList.add(getSnapshotDescription(0, "snapshot02", "table02", + EnvironmentEdgeManager.currentTime())); + snapshotDescriptionList.add(getSnapshotDescription(40, "snapshot03", "table03", + EnvironmentEdgeManager.currentTime())); + Mockito.when(snapshotManager.getCompletedSnapshots()).thenReturn(snapshotDescriptionList); + try { + LOG.debug("5 Snapshots are completed. TTL is expired for 2 them. Going to delete them"); + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(2)).deleteSnapshot(Mockito.any()); + } + + @Test + public void testSnapshotCleanerWithReadIOE() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = getSnapshotCleanerConf(); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + Mockito.when(snapshotManager.getCompletedSnapshots()).thenThrow(IOException.class); + try { + LOG.debug("While getting completed Snapshots, IOException would occur. Hence, No Snapshot" + + " should be deleted"); + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(0)).deleteSnapshot(Mockito.any()); + } + + private SnapshotProtos.SnapshotDescription getSnapshotDescription(final long ttl, + final String snapshotName, final String tableName, final long snapshotCreationTime) { + SnapshotProtos.SnapshotDescription.Builder snapshotDescriptionBuilder = + SnapshotProtos.SnapshotDescription.newBuilder(); + snapshotDescriptionBuilder.setTtl(ttl); + snapshotDescriptionBuilder.setName(snapshotName); + snapshotDescriptionBuilder.setTable(tableName); + snapshotDescriptionBuilder.setType(SnapshotProtos.SnapshotDescription.Type.FLUSH); + snapshotDescriptionBuilder.setCreationTime(snapshotCreationTime); + return snapshotDescriptionBuilder.build(); + } + + /** + * Simple helper class that just keeps track of whether or not its stopped. + */ + private static class StoppableImplementation implements Stoppable { + + private volatile boolean stop = false; + + @Override + public void stop(String why) { + this.stop = true; + } + + @Override + public boolean isStopped() { + return this.stop; + } + + } + +} \ No newline at end of file diff --git a/hbase-shell/src/main/ruby/hbase/admin.rb b/hbase-shell/src/main/ruby/hbase/admin.rb index 5637f7f992..52dabd3c2b 100644 --- a/hbase-shell/src/main/ruby/hbase/admin.rb +++ b/hbase-shell/src/main/ruby/hbase/admin.rb @@ -1108,11 +1108,13 @@ module Hbase @admin.snapshot(snapshot_name, table_name) else args.each do |arg| + ttl = arg[TTL] + ttl = ttl ? ttl.to_java(:long) : -1 if arg[SKIP_FLUSH] == true @admin.snapshot(snapshot_name, table_name, - org.apache.hadoop.hbase.client.SnapshotType::SKIPFLUSH) + org.apache.hadoop.hbase.client.SnapshotType::SKIPFLUSH, ttl) else - @admin.snapshot(snapshot_name, table_name) + @admin.snapshot(snapshot_name, table_name, ttl) end end end -- 2.17.2 (Apple Git-113) From e368685c34fb55d7919258a72629e913acfe1bc9 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Fri, 12 Jul 2019 19:14:36 +0530 Subject: [PATCH 2/3] changes incorporating review comments --- .../hbase/client/SnapshotDescription.java | 4 +- .../src/main/resources/hbase-default.xml | 10 ++++ .../master/cleaner/SnapshotCleanerChore.java | 59 +++++++++++-------- .../snapshot/SnapshotDescriptionUtils.java | 2 + .../cleaner/TestSnapshotCleanerChore.java | 26 ++++++-- .../asciidoc/_chapters/hbase-default.adoc | 14 +++++ src/main/asciidoc/_chapters/ops_mgt.adoc | 34 +++++++++++ 7 files changed, 117 insertions(+), 32 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java index 9e07b6adcb..2d31eae3ee 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java @@ -71,7 +71,7 @@ public class SnapshotDescription { * @see #SnapshotDescription(String, TableName, SnapshotType, String) * @see HBASE-16892 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use the version with the TableName - * instance instead. + * instance instead. */ @Deprecated public SnapshotDescription(String name, String table, SnapshotType type, String owner) { @@ -86,7 +86,7 @@ public class SnapshotDescription { * @see #SnapshotDescription(String, TableName, SnapshotType, String, long, long, int) * @see HBASE-16892 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use the version with the TableName - * instance instead. + * instance instead. */ @Deprecated public SnapshotDescription(String name, String table, SnapshotType type, String owner, diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index a1bba0a02c..f3c7b341be 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -1864,4 +1864,14 @@ possible configurations would overwhelm and obscure the important. Default is 5 minutes. Make it 30 seconds for tests. See HBASE-19794 for some context. + + hbase.master.cleaner.snapshot.interval + 600000 + + Snapshot Cleanup chore interval in milliseconds. + The cleanup thread keeps running at this interval + to find all snapshots that are expired based on TTL + and delete them. + + diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java index 9b24e515fb..6cee493788 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java @@ -18,19 +18,20 @@ package org.apache.hadoop.hbase.master.cleaner; + +import java.io.IOException; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; -import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.List; /** * This chore, every time it runs, will try to delete snapshots that are expired based on TTL in @@ -39,60 +40,66 @@ import java.util.List; @InterfaceAudience.Private public class SnapshotCleanerChore extends ScheduledChore { - private static final Logger LOGGER = LoggerFactory.getLogger(SnapshotCleanerChore.class); + private static final Logger LOG = LoggerFactory.getLogger(SnapshotCleanerChore.class); private static final String SNAPSHOT_CLEANER_CHORE_NAME = "SnapshotCleaner"; private static final String SNAPSHOT_CLEANER_INTERVAL = "hbase.master.cleaner.snapshot.interval"; private static final int SNAPSHOT_CLEANER_DEFAULT_INTERVAL = 600 * 1000; // Default 10 min + private static final String SNAPSHOT_CLEANER_DISABLE = "hbase.master.cleaner.snapshot.disable"; private static final String DELETE_SNAPSHOT_EVENT = "Eligible Snapshot for cleanup due to expired TTL."; private final SnapshotManager snapshotManager; + private final Configuration configuration; public SnapshotCleanerChore(Stoppable stopper, Configuration configuration, SnapshotManager snapshotManager) { super(SNAPSHOT_CLEANER_CHORE_NAME, stopper, configuration.getInt(SNAPSHOT_CLEANER_INTERVAL, SNAPSHOT_CLEANER_DEFAULT_INTERVAL)); this.snapshotManager = snapshotManager; + this.configuration = configuration; } @Override protected void chore() { - LOGGER.trace("Snapshot Cleaner Chore is starting up..."); + final boolean isSnapshotChoreDisabled = this.configuration.getBoolean( + SNAPSHOT_CLEANER_DISABLE, false); + if (isSnapshotChoreDisabled) { + return; + } + LOG.trace("Snapshot Cleaner Chore is starting up..."); try { List completedSnapshotsList = this.snapshotManager.getCompletedSnapshots(); - if (CollectionUtils.isNotEmpty(completedSnapshotsList)) { - for (SnapshotProtos.SnapshotDescription snapshotDescription : completedSnapshotsList) { - long snapshotCreatedTime = snapshotDescription.getCreationTime(); - long snapshotTtl = snapshotDescription.getTtl(); - /* - * Backward compatibility after the patch deployment on HMaster - * Any snapshot with negative or zero ttl should not be deleted - * Default ttl value specified by {@HConstants.DEFAULT_SNAPSHOT_TTL} - */ - if (snapshotCreatedTime > 0 && snapshotTtl > 0 && - snapshotTtl < (Long.MAX_VALUE / 1000)) { - long currentTime = EnvironmentEdgeManager.currentTime(); - if ((snapshotCreatedTime + snapshotTtl * 1000) < currentTime) { - LOGGER.info("Event: {} Name: {}, CreatedTime: {}, TTL: {}, currentTime: {}", - DELETE_SNAPSHOT_EVENT, snapshotDescription.getName(), snapshotCreatedTime, - snapshotTtl, currentTime); - deleteExpiredSnapshot(snapshotDescription); - } + for (SnapshotProtos.SnapshotDescription snapshotDescription : completedSnapshotsList) { + long snapshotCreatedTime = snapshotDescription.getCreationTime(); + long snapshotTtl = snapshotDescription.getTtl(); + /* + * Backward compatibility after the patch deployment on HMaster + * Any snapshot with negative or zero ttl should not be deleted + * Default ttl value specified by {@HConstants.DEFAULT_SNAPSHOT_TTL} + */ + if (snapshotCreatedTime > 0 && snapshotTtl > 0 && + snapshotTtl < (Long.MAX_VALUE / 1000)) { + long currentTime = EnvironmentEdgeManager.currentTime(); + if ((snapshotCreatedTime + snapshotTtl * 1000) < currentTime) { + LOG.info("Event: {} Name: {}, CreatedTime: {}, TTL: {}, currentTime: {}", + DELETE_SNAPSHOT_EVENT, snapshotDescription.getName(), snapshotCreatedTime, + snapshotTtl, currentTime); + deleteExpiredSnapshot(snapshotDescription); } } } } catch (IOException e) { - LOGGER.error("Error while cleaning up Snapshots. Stopping Snapshot Cleaner Chore...", e); + LOG.error("Error while cleaning up Snapshots...", e); } - LOGGER.trace("Snapshot Cleaner Chore is closing..."); + LOG.trace("Snapshot Cleaner Chore is closing..."); } private void deleteExpiredSnapshot(SnapshotProtos.SnapshotDescription snapshotDescription) { try { this.snapshotManager.deleteSnapshot(snapshotDescription); } catch (Exception e) { - LOGGER.error("Error while deleting Snapshot: {}", snapshotDescription.getName(), e); + LOG.error("Error while deleting Snapshot: {}", snapshotDescription.getName(), e); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java index e5a99ed17d..08bd0a052f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java @@ -307,6 +307,8 @@ public final class SnapshotDescriptionUtils { // set default ttl(sec) if it is not set already or the value is out of the range if (ttl == SnapshotDescriptionUtils.NO_SNAPSHOT_TTL_SPECIFIED || ttl > (Long.MAX_VALUE / 1000)) { + LOG.debug("Snapshot current TTL value: {} resetting it to: {}", ttl, + HConstants.DEFAULT_SNAPSHOT_TTL); ttl = HConstants.DEFAULT_SNAPSHOT_TTL; } SnapshotDescription.Builder builder = snapshot.toBuilder(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java index 6139f9b623..eec00e50ed 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java @@ -87,7 +87,7 @@ public class TestSnapshotCleanerChore { EnvironmentEdgeManager.currentTime())); Mockito.when(snapshotManager.getCompletedSnapshots()).thenReturn(snapshotDescriptionList); try { - LOG.debug("2 Snapshots are completed but TTL is not expired for any of them"); + LOG.info("2 Snapshots are completed but TTL is not expired for any of them"); snapshotCleanerChore.chore(); } finally { stopper.stop("Stopping Test Stopper"); @@ -100,6 +100,7 @@ public class TestSnapshotCleanerChore { snapshotManager = Mockito.mock(SnapshotManager.class); Stoppable stopper = new StoppableImplementation(); Configuration conf = getSnapshotCleanerConf(); + conf.setStrings("hbase.master.cleaner.snapshot.disable", "false"); SnapshotCleanerChore snapshotCleanerChore = new SnapshotCleanerChore(stopper, conf, snapshotManager); List snapshotDescriptionList = new ArrayList<>(); @@ -113,7 +114,7 @@ public class TestSnapshotCleanerChore { EnvironmentEdgeManager.currentTime())); Mockito.when(snapshotManager.getCompletedSnapshots()).thenReturn(snapshotDescriptionList); try { - LOG.debug("5 Snapshots are completed. TTL is expired for 2 them. Going to delete them"); + LOG.info("5 Snapshots are completed. TTL is expired for 2 them. Going to delete them"); snapshotCleanerChore.chore(); } finally { stopper.stop("Stopping Test Stopper"); @@ -125,12 +126,12 @@ public class TestSnapshotCleanerChore { public void testSnapshotCleanerWithReadIOE() throws IOException { snapshotManager = Mockito.mock(SnapshotManager.class); Stoppable stopper = new StoppableImplementation(); - Configuration conf = getSnapshotCleanerConf(); + Configuration conf = new HBaseTestingUtility().getConfiguration(); SnapshotCleanerChore snapshotCleanerChore = new SnapshotCleanerChore(stopper, conf, snapshotManager); Mockito.when(snapshotManager.getCompletedSnapshots()).thenThrow(IOException.class); try { - LOG.debug("While getting completed Snapshots, IOException would occur. Hence, No Snapshot" + + LOG.info("While getting completed Snapshots, IOException would occur. Hence, No Snapshot" + " should be deleted"); snapshotCleanerChore.chore(); } finally { @@ -139,6 +140,23 @@ public class TestSnapshotCleanerChore { Mockito.verify(snapshotManager, Mockito.times(0)).deleteSnapshot(Mockito.any()); } + @Test + public void testSnapshotChoreDisable() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = getSnapshotCleanerConf(); + conf.setStrings("hbase.master.cleaner.snapshot.disable", "true"); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + try { + LOG.info("Snapshot Chore is disabled. No cleanup performed for Expired Snapshots"); + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(0)).getCompletedSnapshots(); + } + private SnapshotProtos.SnapshotDescription getSnapshotDescription(final long ttl, final String snapshotName, final String tableName, final long snapshotCreationTime) { SnapshotProtos.SnapshotDescription.Builder snapshotDescriptionBuilder = diff --git a/src/main/asciidoc/_chapters/hbase-default.adoc b/src/main/asciidoc/_chapters/hbase-default.adoc index 0e806e13c9..14002e94c8 100644 --- a/src/main/asciidoc/_chapters/hbase-default.adoc +++ b/src/main/asciidoc/_chapters/hbase-default.adoc @@ -2131,3 +2131,17 @@ The percent of region server RPC threads failed to abort RS. .Default `0.5` + +[[hbase.master.cleaner.snapshot.interval]] +*`hbase.master.cleaner.snapshot.interval`*:: ++ +.Description + + Snapshot Cleanup chore interval in milliseconds. + The cleanup thread keeps running at this interval + to find all snapshots that are expired based on TTL + and delete them. + ++ +.Default +`600000` diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index 2f139ddd4b..78a21d1f28 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -2745,6 +2745,40 @@ A snapshot is only a representation of a table during a window of time. The amount of time the snapshot operation will take to reach each Region Server may vary from a few seconds to a minute, depending on the resource load and speed of the hardware or network, among other factors. There is also no way to know whether a given insert or update is in memory or has been flushed. + +.Take a Snapshot With TTL +Snapshots have a lifecycle that is independent from the table from which they are created. +Although data in a table may be stored with TTL the data files containing them become +frozen by the snapshot. Space consumed by expired cells will not be reclaimed by normal +table housekeeping like compaction. While this is expected it can be inconvenient at scale. +When many snapshots are under management and the data in various tables is expired by +TTL some notion of optional TTL (and optional default TTL) for snapshots could be useful. + +Default Snapshot TTL = 30 Days + +While creating a Snapshot, if TTL in seconds is not specified, the Default `TTL` will be applicable. + +---- +hbase> snapshot 'mytable', 'snapshot1234', {TTL => 86400} +---- + +The above command creates snapshot `snapshot1234` with TTL of 86400 sec(24 hours) +and hence, the snapshot is supposed to be cleaned up after 24 hours + +If snapshot is never supposed to get expired, negative TTL can be provided during +the snapshot creation + +---- +hbase> snapshot 'mytable', 'snapshot234', {TTL => -2} +---- + +At any point of time, if Snapshot cleanup is supposed to be stopped due to +some snapshot restore activity, it is advisable to disable Snapshot Cleaner with +hbase-site config: + +hbase.master.cleaner.snapshot.disable: true + + [[ops.snapshots.list]] === Listing Snapshots -- 2.17.2 (Apple Git-113) From ddab1e117268c657f45851b6ed82da988e5fadd2 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Sat, 13 Jul 2019 16:59:14 +0530 Subject: [PATCH 3/3] minor changes with review comments - updating frequency of chore run --- .../src/main/resources/hbase-default.xml | 2 +- .../master/cleaner/SnapshotCleanerChore.java | 12 ++++++++++-- .../cleaner/TestSnapshotCleanerChore.java | 17 +++++++++++------ src/main/asciidoc/_chapters/hbase-default.adoc | 2 +- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index f3c7b341be..527a40b8b7 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -1866,7 +1866,7 @@ possible configurations would overwhelm and obscure the important. hbase.master.cleaner.snapshot.interval - 600000 + 1800000 Snapshot Cleanup chore interval in milliseconds. The cleanup thread keeps running at this interval diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java index 6cee493788..b9bfd9d413 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java @@ -26,12 +26,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; -import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; /** * This chore, every time it runs, will try to delete snapshots that are expired based on TTL in @@ -43,7 +43,7 @@ public class SnapshotCleanerChore extends ScheduledChore { private static final Logger LOG = LoggerFactory.getLogger(SnapshotCleanerChore.class); private static final String SNAPSHOT_CLEANER_CHORE_NAME = "SnapshotCleaner"; private static final String SNAPSHOT_CLEANER_INTERVAL = "hbase.master.cleaner.snapshot.interval"; - private static final int SNAPSHOT_CLEANER_DEFAULT_INTERVAL = 600 * 1000; // Default 10 min + private static final int SNAPSHOT_CLEANER_DEFAULT_INTERVAL = 1800 * 1000; // Default 30 min private static final String SNAPSHOT_CLEANER_DISABLE = "hbase.master.cleaner.snapshot.disable"; private static final String DELETE_SNAPSHOT_EVENT = "Eligible Snapshot for cleanup due to expired TTL."; @@ -51,6 +51,13 @@ public class SnapshotCleanerChore extends ScheduledChore { private final SnapshotManager snapshotManager; private final Configuration configuration; + /** + * Construct Snapshot Cleaner Chore with parameterized constructor + * + * @param stopper When {@link Stoppable#isStopped()} is true, this chore will cancel and cleanup + * @param configuration The configuration to set + * @param snapshotManager SnapshotManager instance to manage lifecycle of snapshot + */ public SnapshotCleanerChore(Stoppable stopper, Configuration configuration, SnapshotManager snapshotManager) { super(SNAPSHOT_CLEANER_CHORE_NAME, stopper, configuration.getInt(SNAPSHOT_CLEANER_INTERVAL, @@ -64,6 +71,7 @@ public class SnapshotCleanerChore extends ScheduledChore { final boolean isSnapshotChoreDisabled = this.configuration.getBoolean( SNAPSHOT_CLEANER_DISABLE, false); if (isSnapshotChoreDisabled) { + LOG.debug("Snapshot Cleaner Chore is disabled. Not performing any cleanup..."); return; } LOG.trace("Snapshot Cleaner Chore is starting up..."); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java index eec00e50ed..c72e3c246c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java @@ -18,12 +18,15 @@ package org.apache.hadoop.hbase.master.cleaner; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; -import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -34,10 +37,12 @@ import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + +/** + * Tests for SnapshotsCleanerChore + */ @Category({MasterTests.class, SmallTests.class}) public class TestSnapshotCleanerChore { @@ -131,8 +136,8 @@ public class TestSnapshotCleanerChore { new SnapshotCleanerChore(stopper, conf, snapshotManager); Mockito.when(snapshotManager.getCompletedSnapshots()).thenThrow(IOException.class); try { - LOG.info("While getting completed Snapshots, IOException would occur. Hence, No Snapshot" + - " should be deleted"); + LOG.info("While getting completed Snapshots, IOException would occur. Hence, No Snapshot" + + " should be deleted"); snapshotCleanerChore.chore(); } finally { stopper.stop("Stopping Test Stopper"); diff --git a/src/main/asciidoc/_chapters/hbase-default.adoc b/src/main/asciidoc/_chapters/hbase-default.adoc index 14002e94c8..6b091022e9 100644 --- a/src/main/asciidoc/_chapters/hbase-default.adoc +++ b/src/main/asciidoc/_chapters/hbase-default.adoc @@ -2144,4 +2144,4 @@ The percent of region server RPC threads failed to abort RS. + .Default -`600000` +`1800000` -- 2.17.2 (Apple Git-113)