From f150a704d70f87af411efa19a0d415006067a44d Mon Sep 17 00:00:00 2001 From: Guanghao Zhang Date: Thu, 7 Jun 2018 18:37:48 +0800 Subject: [PATCH] HBASE-20698 Master don't record right server version until new started region server call regionServerReport method --- .../hadoop/hbase/master/MasterRpcServices.java | 8 +++--- .../hbase/master/assignment/AssignmentManager.java | 30 +++++++++++----------- .../hbase/master/assignment/RegionStates.java | 23 +++++++---------- .../master/assignment/MockMasterServices.java | 5 ++-- 4 files changed, 31 insertions(+), 35 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index d3202ce..0a1393b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -459,9 +459,8 @@ public class MasterRpcServices extends RSRpcServices ServerMetrics oldLoad = master.getServerManager().getLoad(serverName); ServerMetrics newLoad = ServerMetricsBuilder.toServerMetrics(serverName, sl); master.getServerManager().regionServerReport(serverName, newLoad); - int version = VersionInfoUtil.getCurrentClientVersionNumber(); - master.getAssignmentManager().reportOnlineRegions(serverName, - version, newLoad.getRegionMetrics().keySet()); + master.getAssignmentManager() + .reportOnlineRegions(serverName, newLoad.getRegionMetrics().keySet()); if (sl != null && master.metricsMaster != null) { // Up our metrics. master.metricsMaster.incrementRequests(sl.getTotalNumberOfRequests() @@ -484,7 +483,8 @@ public class MasterRpcServices extends RSRpcServices // if regionserver passed hostname to use, // then use it instead of doing a reverse DNS lookup ServerName rs = master.getServerManager().regionServerStartup(request, ia); - + int version = VersionInfoUtil.getCurrentClientVersionNumber(); + master.getAssignmentManager().recordServerNode(rs, version); // Send back some config info RegionServerStartupResponse.Builder resp = createConfigurationSubset(); NameStringPair.Builder entry = NameStringPair.newBuilder() diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 35e4aa4..630ec3b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -861,7 +861,7 @@ public class AssignmentManager implements ServerListener { serverName, regionNode, state)); } - final ServerStateNode serverNode = regionStates.getOrCreateServer(serverName); + final ServerStateNode serverNode = regionStates.getServerNode(serverName); if (!reportTransition(regionNode, serverNode, state, seqId)) { // Don't log if shutting down cluster; during shutdown. LOG.warn("No matching procedure found for {} transition to {}", regionNode, state); @@ -911,7 +911,7 @@ public class AssignmentManager implements ServerListener { master.getMasterProcedureExecutor().submitProcedure(createSplitProcedure(parent, splitKey)); // If the RS is < 2.0 throw an exception to abort the operation, we are handling the split - if (regionStates.getOrCreateServer(serverName).getVersionNumber() < 0x0200000) { + if (regionStates.getServerNode(serverName).getVersionNumber() < 0x0200000) { throw new UnsupportedOperationException(String.format( "Split handled by the master: parent=%s hriA=%s hriB=%s", parent.getShortNameToLog(), hriA, hriB)); } @@ -934,25 +934,32 @@ public class AssignmentManager implements ServerListener { master.getMasterProcedureExecutor().submitProcedure(createMergeProcedure(hriA, hriB)); // If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge - if (regionStates.getOrCreateServer(serverName).getVersionNumber() < 0x0200000) { + if (regionStates.getServerNode(serverName).getVersionNumber() < 0x0200000) { throw new UnsupportedOperationException(String.format( "Merge not handled yet: regionState=%s merged=%s hriA=%s hriB=%s", state, merged, hriA, hriB)); } } + public void recordServerNode(ServerName serverName, int versionNumber) { + if (!isRunning()) { + return; + } + regionStates.createServerNode(serverName, versionNumber); + } + // ============================================================================================ // RS Status update (report online regions) helpers // ============================================================================================ /** * the master will call this method when the RS send the regionServerReport(). - * the report will contains the "hbase version" and the "online regions". + * the report will contains the "online regions". * this method will check the the online regions against the in-memory state of the AM, * if there is a mismatch we will try to fence out the RS with the assumption * that something went wrong on the RS side. */ - public void reportOnlineRegions(final ServerName serverName, - final int versionNumber, final Set regionNames) throws YouAreDeadException { + public void reportOnlineRegions(final ServerName serverName, final Set regionNames) + throws YouAreDeadException { if (!isRunning()) return; if (LOG.isTraceEnabled()) { LOG.trace("ReportOnlineRegions " + serverName + " regionCount=" + regionNames.size() + @@ -961,11 +968,8 @@ public class AssignmentManager implements ServerListener { collect(Collectors.toList())); } - final ServerStateNode serverNode = regionStates.getOrCreateServer(serverName); - - // update the server version number. This will be used for live upgrades. + final ServerStateNode serverNode = regionStates.getServerNode(serverName); synchronized (serverNode) { - serverNode.setVersionNumber(versionNumber); if (serverNode.isInState(ServerState.SPLITTING, ServerState.OFFLINE)) { LOG.warn("Got a report from a server result in state " + serverNode.getState()); return; @@ -1063,11 +1067,7 @@ public class AssignmentManager implements ServerListener { } protected boolean waitServerReportEvent(final ServerName serverName, final Procedure proc) { - final ServerStateNode serverNode = regionStates.getOrCreateServer(serverName); - if (serverNode == null) { - LOG.warn("serverName=null; {}", proc); - } - return serverNode.getReportEvent().suspendIfNotReady(proc); + return regionStates.getServerNode(serverName).getReportEvent().suspendIfNotReady(proc); } protected void wakeServerReportEvent(final ServerStateNode serverNode) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index e103f2c..396759a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -340,10 +340,11 @@ public class RegionStates { private volatile ServerState state = ServerState.ONLINE; private volatile int versionNumber = 0; - public ServerStateNode(final ServerName serverName) { + public ServerStateNode(final ServerName serverName, int versionNumber) { this.serverName = serverName; this.regions = ConcurrentHashMap.newKeySet(); this.reportEvent = new ServerReportEvent(serverName); + this.versionNumber = versionNumber; } public ServerName getServerName() { @@ -380,10 +381,6 @@ public class RegionStates { this.state = state; } - public void setVersionNumber(final int versionNumber) { - this.versionNumber = versionNumber; - } - public Set getRegions() { return regions; } @@ -629,7 +626,7 @@ public class RegionStates { * @see #logSplit(ServerName) */ public void logSplitting(final ServerName serverName) { - final ServerStateNode serverNode = getOrCreateServer(serverName); + final ServerStateNode serverNode = getServerNode(serverName); synchronized (serverNode) { serverNode.setState(ServerState.SPLITTING); } @@ -640,7 +637,7 @@ public class RegionStates { * @see #logSplitting(ServerName) */ public void logSplit(final ServerName serverName) { - final ServerStateNode serverNode = getOrCreateServer(serverName); + final ServerStateNode serverNode = getServerNode(serverName); synchronized (serverNode) { serverNode.setState(ServerState.OFFLINE); } @@ -971,14 +968,12 @@ public class RegionStates { * you could mess up online server accounting. TOOD: Review usage and convert * to {@link #getServerNode(ServerName)} where we can. */ - public ServerStateNode getOrCreateServer(final ServerName serverName) { + public void createServerNode(ServerName serverName, int versionNumber) { ServerStateNode node = serverMap.get(serverName); if (node == null) { - node = new ServerStateNode(serverName); - ServerStateNode oldNode = serverMap.putIfAbsent(serverName, node); - node = oldNode != null ? oldNode : node; + node = new ServerStateNode(serverName, versionNumber); + serverMap.putIfAbsent(serverName, node); } - return node; } public void removeServer(final ServerName serverName) { @@ -1000,14 +995,14 @@ public class RegionStates { } public ServerStateNode addRegionToServer(final RegionStateNode regionNode) { - ServerStateNode serverNode = getOrCreateServer(regionNode.getRegionLocation()); + ServerStateNode serverNode = getServerNode(regionNode.getRegionLocation()); serverNode.addRegion(regionNode); return serverNode; } public ServerStateNode removeRegionFromServer(final ServerName serverName, final RegionStateNode regionNode) { - ServerStateNode serverNode = getOrCreateServer(serverName); + ServerStateNode serverNode = getServerNode(serverName); serverNode.removeRegion(regionNode); return serverNode; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java index fb75001..e830e9f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java @@ -125,8 +125,9 @@ public class MockMasterServices extends MockNoopMasterServices { // Make a report with current state of the server 'serverName' before we call wait.. SortedSet regions = regionsToRegionServers.get(serverName); try { - getAssignmentManager().reportOnlineRegions(serverName, 0, - regions == null? new HashSet(): regions); + getAssignmentManager().recordServerNode(serverName, 0); + getAssignmentManager() + .reportOnlineRegions(serverName, regions == null ? new HashSet() : regions); } catch (YouAreDeadException e) { throw new RuntimeException(e); } -- 2.7.4