From a643e6c3b4e2cda1db0f71e247f7c9056f3b9964 Mon Sep 17 00:00:00 2001 From: Umesh Agashe Date: Sat, 15 Jul 2017 22:51:05 -0700 Subject: [PATCH] HBASE-18366 Fixed flaky test hbase.master.procedure.TestServerCrashProcedure#testRecoveryAndDoubleExecutionOnRsWithMeta Changes include: * Modified AssignmentManager.processAssignQueue() method to consider only highest versioned region servers for system table regions when destination server is not specified for them. Destination server is retained, if specified. * Modified MoveRegionProcedure to allow null value for destination server i.e. moving a region from specific source server to non-specific/ unknown destination server (picked by load-balancer) is supported now. * Removed call to HMaster.checkIfShouldMoveSystemRegionAsync() from RegionServerTracker.refresh(). This effectively means: system table regions are considered for reassignment to highest versioned region servers when: 1. master restarts (all system table regions) 2. when region server carrying them shuts down/ crashes (only system table regions on crashed servers) 3. NOT when a region server is added It also gives preference to any specific destination, any code (or user) might have specified. * Enabled test hbase.master.procedure.TestServerCrashProcedure#testRecoveryAndDoubleExecutionOnRsWithMeta --- .../protobuf/generated/MasterProcedureProtos.java | 53 ++++++++++------------ .../src/main/protobuf/MasterProcedure.proto | 2 +- .../hbase/master/assignment/AssignmentManager.java | 42 +++++++++++++---- .../master/assignment/MoveRegionProcedure.java | 15 ++++-- .../hbase/zookeeper/RegionServerTracker.java | 3 -- .../master/procedure/TestServerCrashProcedure.java | 1 - 6 files changed, 70 insertions(+), 46 deletions(-) diff --git a/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/generated/MasterProcedureProtos.java b/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/generated/MasterProcedureProtos.java index 0ec9b22953fa6b7d1e2204a7d144f8b7936c20a0..f450015bf20cf2701827ec17c633e209c446de3c 100644 --- a/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/generated/MasterProcedureProtos.java +++ b/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/generated/MasterProcedureProtos.java @@ -28127,15 +28127,15 @@ public final class MasterProcedureProtos { org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerNameOrBuilder getSourceServerOrBuilder(); /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ boolean hasDestinationServer(); /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName getDestinationServer(); /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerNameOrBuilder getDestinationServerOrBuilder(); } @@ -28290,19 +28290,19 @@ public final class MasterProcedureProtos { public static final int DESTINATION_SERVER_FIELD_NUMBER = 3; private org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName destinationServer_; /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public boolean hasDestinationServer() { return ((bitField0_ & 0x00000004) == 0x00000004); } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName getDestinationServer() { return destinationServer_ == null ? org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance() : destinationServer_; } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerNameOrBuilder getDestinationServerOrBuilder() { return destinationServer_ == null ? org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance() : destinationServer_; @@ -28318,10 +28318,6 @@ public final class MasterProcedureProtos { memoizedIsInitialized = 0; return false; } - if (!hasDestinationServer()) { - memoizedIsInitialized = 0; - return false; - } if (hasRegionInfo()) { if (!getRegionInfo().isInitialized()) { memoizedIsInitialized = 0; @@ -28332,9 +28328,11 @@ public final class MasterProcedureProtos { memoizedIsInitialized = 0; return false; } - if (!getDestinationServer().isInitialized()) { - memoizedIsInitialized = 0; - return false; + if (hasDestinationServer()) { + if (!getDestinationServer().isInitialized()) { + memoizedIsInitialized = 0; + return false; + } } memoizedIsInitialized = 1; return true; @@ -28673,9 +28671,6 @@ public final class MasterProcedureProtos { if (!hasSourceServer()) { return false; } - if (!hasDestinationServer()) { - return false; - } if (hasRegionInfo()) { if (!getRegionInfo().isInitialized()) { return false; @@ -28684,8 +28679,10 @@ public final class MasterProcedureProtos { if (!getSourceServer().isInitialized()) { return false; } - if (!getDestinationServer().isInitialized()) { - return false; + if (hasDestinationServer()) { + if (!getDestinationServer().isInitialized()) { + return false; + } } return true; } @@ -28949,13 +28946,13 @@ public final class MasterProcedureProtos { private org.apache.hadoop.hbase.shaded.com.google.protobuf.SingleFieldBuilderV3< org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName, org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName.Builder, org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerNameOrBuilder> destinationServerBuilder_; /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public boolean hasDestinationServer() { return ((bitField0_ & 0x00000004) == 0x00000004); } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName getDestinationServer() { if (destinationServerBuilder_ == null) { @@ -28965,7 +28962,7 @@ public final class MasterProcedureProtos { } } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public Builder setDestinationServer(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName value) { if (destinationServerBuilder_ == null) { @@ -28981,7 +28978,7 @@ public final class MasterProcedureProtos { return this; } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public Builder setDestinationServer( org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName.Builder builderForValue) { @@ -28995,7 +28992,7 @@ public final class MasterProcedureProtos { return this; } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public Builder mergeDestinationServer(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName value) { if (destinationServerBuilder_ == null) { @@ -29015,7 +29012,7 @@ public final class MasterProcedureProtos { return this; } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public Builder clearDestinationServer() { if (destinationServerBuilder_ == null) { @@ -29028,7 +29025,7 @@ public final class MasterProcedureProtos { return this; } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName.Builder getDestinationServerBuilder() { bitField0_ |= 0x00000004; @@ -29036,7 +29033,7 @@ public final class MasterProcedureProtos { return getDestinationServerFieldBuilder().getBuilder(); } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ public org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerNameOrBuilder getDestinationServerOrBuilder() { if (destinationServerBuilder_ != null) { @@ -29047,7 +29044,7 @@ public final class MasterProcedureProtos { } } /** - * required .hbase.pb.ServerName destination_server = 3; + * optional .hbase.pb.ServerName destination_server = 3; */ private org.apache.hadoop.hbase.shaded.com.google.protobuf.SingleFieldBuilderV3< org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName, org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName.Builder, org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerNameOrBuilder> @@ -30959,7 +30956,7 @@ public final class MasterProcedureProtos { "\n\005force\030\004 \001(\010:\005false\"\237\001\n\023MoveRegionState" + "Data\022)\n\013region_info\030\001 \001(\0132\024.hbase.pb.Reg" + "ionInfo\022+\n\rsource_server\030\002 \002(\0132\024.hbase.p" + - "b.ServerName\0220\n\022destination_server\030\003 \002(\013", + "b.ServerName\0220\n\022destination_server\030\003 \001(\013", "2\024.hbase.pb.ServerName\">\n\021GCRegionStateD" + "ata\022)\n\013region_info\030\001 \002(\0132\024.hbase.pb.Regi" + "onInfo\"\226\001\n\030GCMergedRegionsStateData\022&\n\010p" + diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto index c7d6598b81cc4b07b191ba37f8904f67cedd6a1e..a7d0be3cfe9a9aeb1342ccb75711e97179dc72e8 100644 --- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto @@ -376,7 +376,7 @@ enum MoveRegionState { message MoveRegionStateData { optional RegionInfo region_info = 1; required ServerName source_server = 2; - required ServerName destination_server = 3; + optional ServerName destination_server = 3; } enum GCRegionState { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index cb2ecf10c232142c197744a07d4dce3d7a98af3e..c93f4ea5b01fafc9da8c1a277f2730184eb5111c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -488,8 +488,8 @@ public class AssignmentManager implements ServerListener { List regionsShouldMove = getCarryingSystemTables(server); if (!regionsShouldMove.isEmpty()) { for (HRegionInfo regionInfo : regionsShouldMove) { - RegionPlan plan = new RegionPlan(regionInfo, server, - getBalancer().randomAssignment(regionInfo, serverList)); + // null value for dest forces destination server to be selected by balancer + RegionPlan plan = new RegionPlan(regionInfo, server, null); if (regionInfo.isMetaRegion()) { // Must move meta region first. moveAsync(plan); @@ -1651,9 +1651,14 @@ public class AssignmentManager implements ServerListener { } // TODO: Optimize balancer. pass a RegionPlan? - final HashMap retainMap = new HashMap(); - final List rrList = new ArrayList(); - for (RegionStateNode regionNode: regions.values()) { + final HashMap retainMap = new HashMap<>(); + final List userRRList = new ArrayList<>(); + // regions for system tables requiring reassignment + final List sysRRList = new ArrayList<>(); + for (RegionStateNode regionNode : regions.values()) { + boolean sysTable = regionNode.isSystemTable(); + final List rrList = sysTable ? sysRRList : userRRList; + if (regionNode.getRegionLocation() != null) { retainMap.put(regionNode.getRegionInfo(), regionNode.getRegionLocation()); } else { @@ -1662,7 +1667,6 @@ public class AssignmentManager implements ServerListener { } // TODO: connect with the listener to invalidate the cache - final LoadBalancer balancer = getBalancer(); // TODO use events List servers = master.getServerManager().createDestinationServersList(); @@ -1682,13 +1686,35 @@ public class AssignmentManager implements ServerListener { servers = master.getServerManager().createDestinationServersList(); } - final boolean isTraceEnabled = LOG.isTraceEnabled(); + if (!sysRRList.isEmpty()) { + // system table regions requiring reassignment are present, get region servers + // not available for system table regions + final List excludeServers = getExcludedServersForSystemTable(); + List serversForSysTables = servers.stream() + .filter(s -> !excludeServers.contains(s)).collect(Collectors.toList()); + if (serversForSysTables.isEmpty()) { + LOG.warn("No servers available for system table regions, considering all servers!"); + } + LOG.info("processing assignment plans for system tables sysServersCount=" + + serversForSysTables.size() + ", allServersCount=" + servers.size()); + processAssignmentPlans(regions, null, sysRRList, + serversForSysTables.isEmpty() ? servers : serversForSysTables); + } + + processAssignmentPlans(regions, retainMap, userRRList, servers); + } + + private void processAssignmentPlans(final HashMap regions, + final HashMap retainMap, final List rrList, + final List servers) { + boolean isTraceEnabled = LOG.isTraceEnabled(); if (isTraceEnabled) { LOG.trace("available servers count=" + servers.size() + ": " + servers); } + final LoadBalancer balancer = getBalancer(); // ask the balancer where to place regions - if (!retainMap.isEmpty()) { + if (retainMap != null && !retainMap.isEmpty()) { if (isTraceEnabled) { LOG.trace("retain assign regions=" + retainMap); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java index d8c1b7de3b55aedbffe99b821de99b2870fb2d34..2f6081978810ead825395a6034897802e351a190 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java @@ -54,7 +54,6 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure