From 8d56b75b376ddf395bef6fb78b03860a69e647ff Mon Sep 17 00:00:00 2001 From: Toshihiro Suzuki Date: Tue, 13 Feb 2018 01:55:07 +0900 Subject: [PATCH] HBASE-19893 restore_snapshot is broken in master branch when region splits --- .../hbase/master/assignment/RegionStates.java | 5 +- .../master/procedure/RestoreSnapshotProcedure.java | 56 ++++++++++++++++++++-- .../hbase/snapshot/RestoreSnapshotHelper.java | 3 +- .../apache/hadoop/hbase/HBaseTestingUtility.java | 36 ++++++++++---- .../client/TestRestoreSnapshotFromClient.java | 7 ++- 5 files changed, 90 insertions(+), 17 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index adeba67..84253ec 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -477,6 +477,10 @@ public class RegionStates { } } + public void deleteRegions(final List regionInfos) { + regionInfos.forEach(this::deleteRegion); + } + ArrayList getTableRegionStateNodes(final TableName tableName) { final ArrayList regions = new ArrayList(); for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) { @@ -618,7 +622,6 @@ public class RegionStates { } } - @VisibleForTesting public void updateRegionState(final RegionInfo regionInfo, final State state) { final RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo); synchronized (regionNode) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java index 9aa5171..aaa240c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RestoreSnapshotProcedure.java @@ -24,6 +24,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -33,11 +34,15 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.errorhandling.ForeignException; import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MetricsSnapshot; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.master.assignment.AssignmentManager; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; @@ -387,7 +392,7 @@ public class RestoreSnapshotProcedure env.getMasterServices().getConfiguration(), fs, manifest, - modifiedTableDescriptor, + modifiedTableDescriptor, rootDir, monitorException, getMonitorStatus()); @@ -419,8 +424,8 @@ public class RestoreSnapshotProcedure // 1. Prepare to restore getMonitorStatus().setStatus("Preparing to restore each region"); - // 2. Applies changes to hbase:meta - // (2.1). Removes the current set of regions from META + // 2. Applies changes to hbase:meta and in-memory states + // (2.1). Removes the current set of regions from META and in-memory states // // By removing also the regions to restore (the ones present both in the snapshot // and in the current state) we ensure that no extra fields are present in META @@ -429,9 +434,16 @@ public class RestoreSnapshotProcedure // that are not correct after the restore. if (regionsToRemove != null) { MetaTableAccessor.deleteRegions(conn, regionsToRemove); + deleteRegionsFromInMemoryStates(regionsToRemove, env); } - // (2.2). Add the new set of regions to META + // We also need to remove region replicas from in-memory states + List regionReplicas = env.getAssignmentManager().getRegionStates() + .getRegionsOfTable(getTableName()).stream() + .filter(r -> !RegionReplicaUtil.isDefaultReplica(r)).collect(Collectors.toList()); + deleteRegionsFromInMemoryStates(regionReplicas, env); + + // (2.2). Add the new set of regions to META and in-memory states // // At this point the old regions are no longer present in META. // and the set of regions present in the snapshot will be written to META. @@ -442,6 +454,8 @@ public class RestoreSnapshotProcedure conn, regionsToAdd, modifiedTableDescriptor.getRegionReplication()); + + addRegionsToInMemoryStates(regionsToAdd, env); } if (regionsToRestore != null) { @@ -449,6 +463,9 @@ public class RestoreSnapshotProcedure conn, regionsToRestore, modifiedTableDescriptor.getRegionReplication()); + + deleteRegionsFromInMemoryStates(regionsToRestore, env); + addRegionsToInMemoryStates(regionsToRestore, env); } RestoreSnapshotHelper.RestoreMetaChanges metaChanges = @@ -475,6 +492,37 @@ public class RestoreSnapshotProcedure monitorStatus.getCompletionTimestamp() - monitorStatus.getStartTime()); } + /** + * Delete regions from in-memory states + * @param regionInfos regions to delete + * @param env MasterProcedureEnv + */ + private void deleteRegionsFromInMemoryStates(List regionInfos, + MasterProcedureEnv env) { + env.getAssignmentManager().getRegionStates().deleteRegions(regionInfos); + env.getMasterServices().getServerManager().removeRegions(regionInfos); + FavoredNodesManager fnm = env.getMasterServices().getFavoredNodesManager(); + if (fnm != null) { + fnm.deleteFavoredNodesForRegions(regionInfos); + } + } + + /** + * Add regions to in-memory states + * @param regionInfos regions to add + * @param env MasterProcedureEnv + */ + private void addRegionsToInMemoryStates(List regionInfos, MasterProcedureEnv env) { + AssignmentManager am = env.getAssignmentManager(); + for (RegionInfo regionInfo : regionInfos) { + if (regionInfo.isSplit()) { + am.getRegionStates().updateRegionState(regionInfo, RegionState.State.SPLIT); + } else { + am.getRegionStates().updateRegionState(regionInfo, RegionState.State.CLOSED); + } + } + } + private void restoreSnapshotAcl(final MasterProcedureEnv env) throws IOException { if (restoreAcl && snapshot.hasUsersAndPermissions() && snapshot.getUsersAndPermissions() != null && SnapshotDescriptionUtils diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java index 179dfe5..7edf734 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java @@ -229,7 +229,8 @@ public class RestoreSnapshotHelper { if (regionNames.contains(regionName)) { LOG.info("region to restore: " + regionName); regionNames.remove(regionName); - metaChanges.addRegionToRestore(regionInfo); + metaChanges.addRegionToRestore(ProtobufUtil.toRegionInfo(regionManifests.get(regionName) + .getRegionInfo())); } else { LOG.info("region to remove: " + regionName); metaChanges.addRegionToRemove(regionInfo); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index b48abc6..d9dfd82 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -2417,6 +2417,19 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility { return rows; } + /** + * Returns all regions of the specified table + * + * @param tableName the table name + * @return all regions of the specified table + * @throws IOException when getting the regions fails. + */ + public List getRegions(TableName tableName) throws IOException { + try (Admin admin = getConnection().getAdmin()) { + return admin.getRegions(tableName); + } + } + /* * Find any other region server which is different from the one identified by parameter * @param rs @@ -2435,9 +2448,6 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility { /** * Tool to get the reference to the region server object that holds the * region of the specified user table. - * It first searches for the meta rows that contain the region of the - * specified table, then gets the index of that RS, and finally retrieves - * the RS's reference. * @param tableName user table to lookup in hbase:meta * @return region server that holds it, null if the row doesn't exist * @throws IOException @@ -2445,21 +2455,27 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility { */ public HRegionServer getRSForFirstRegionInTable(TableName tableName) throws IOException, InterruptedException { - List metaRows = getMetaTableRows(tableName); - if (metaRows == null || metaRows.isEmpty()) { + List regions = getRegions(tableName); + if (regions == null || regions.isEmpty()) { return null; } - LOG.debug("Found " + metaRows.size() + " rows for table " + - tableName); - byte [] firstrow = metaRows.get(0); - LOG.debug("FirstRow=" + Bytes.toString(firstrow)); + LOG.debug("Found " + regions.size() + " regions for table " + + tableName); + + byte[] firstRegionName = regions.stream() + .filter(r -> !r.isOffline()) + .map(RegionInfo::getRegionName) + .findFirst() + .orElseThrow(() -> new IOException("online regions not found in table " + tableName)); + + LOG.debug("firstRegionName=" + Bytes.toString(firstRegionName)); long pause = getConfiguration().getLong(HConstants.HBASE_CLIENT_PAUSE, HConstants.DEFAULT_HBASE_CLIENT_PAUSE); int numRetries = getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER); RetryCounter retrier = new RetryCounter(numRetries+1, (int)pause, TimeUnit.MICROSECONDS); while(retrier.shouldRetry()) { - int index = getMiniHBaseCluster().getServerWith(firstrow); + int index = getMiniHBaseCluster().getServerWith(firstRegionName); if (index != -1) { return getMiniHBaseCluster().getRegionServerThreads().get(index).getRegionServer(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java index eb8b20e..f42aeaa 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Set; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; @@ -307,6 +306,12 @@ public class TestRestoreSnapshotFromClient { // Take a snapshot admin.snapshot(snapshotName1, tableName); + // Load more data + SnapshotTestingUtils.loadData(TEST_UTIL, tableName, 500, FAMILY); + + // Split the second region + splitRegion(regionInfos.get(1)); + // Restore the snapshot admin.disableTable(tableName); admin.restoreSnapshot(snapshotName1); -- 2.10.1 (Apple Git-78)