From 9941caec5251a89c6990913626b27af9c9fac098 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Fri, 3 Nov 2017 15:13:13 -0700 Subject: [PATCH] HBASE-19165 TODO Handle stuck in transition: rit=OPENING, location=ve0538.... --- .../hbase/master/assignment/AssignmentManager.java | 18 +++++++++++++++++- .../hbase/master/assignment/RegionStateStore.java | 15 ++++++++++----- .../master/procedure/TestRestoreSnapshotProcedure.java | 2 ++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 8bdf4d5dfd..0819137d5f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -951,7 +951,7 @@ public class AssignmentManager implements ServerListener { final RegionStateNode regionNode = regionStates.getOrCreateRegionNode(hri); LOG.info("META REPORTED: " + regionNode); if (!reportTransition(regionNode, serverNode, TransitionCode.OPENED, 0)) { - LOG.warn("META REPORTED but no procedure found"); + LOG.warn("META REPORTED but no procedure found (complete?)"); regionNode.setRegionLocation(serverNode.getServerName()); } else if (LOG.isTraceEnabled()) { LOG.trace("META REPORTED: " + regionNode); @@ -1183,6 +1183,22 @@ public class AssignmentManager implements ServerListener { public void visitRegionState(final RegionInfo regionInfo, final State state, final ServerName regionLocation, final ServerName lastHost, final long openSeqNum) { final RegionStateNode regionNode = regionStates.getOrCreateRegionNode(regionInfo); + if (state == null) { + // No state in hbase:meta table. Are I doing a rolling upgrade from hbase1 to + // hbase2? If so, there is no state in hbase:meta. It is in zk or, if region has + // been successfully deployed, there is no state in zk; need to ask regionserver + // if it is serving. TODO. + // Presume for now that this is a clean start of an hbase2 over a hbase1 dataset. + // Set state to be OFFLINE. + LOG.info("State was NULL for " + regionInfo.getEncodedName() + "; presuming " + + "OFFLINE! Assigning..."); + try { + assign(regionInfo); + } catch (java.io.IOException ioe) { + LOG.warn("Failed assign of " + regionInfo, ioe); + } + return; + } synchronized (regionNode) { if (!regionNode.isInTransition()) { regionNode.setState(state); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java index f9a1b438c8..85a0d60bc4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java @@ -90,10 +90,15 @@ public class RegionStateStore { @Override public boolean visit(final Result r) throws IOException { if (r != null && !r.isEmpty()) { - long st = System.currentTimeMillis(); + long st = 0; + if (LOG.isTraceEnabled()) { + st = System.currentTimeMillis(); + } visitMetaEntry(visitor, r); - long et = System.currentTimeMillis(); - LOG.info("[T] LOAD META PERF " + StringUtils.humanTimeDiff(et - st)); + if (LOG.isTraceEnabled()) { + long et = System.currentTimeMillis(); + LOG.trace("[T] LOAD META PERF " + StringUtils.humanTimeDiff(et - st)); + } } else if (isDebugEnabled) { LOG.debug("NULL result from meta - ignoring but this is strange."); } @@ -310,11 +315,11 @@ public class RegionStateStore { /** * Pull the region state from a catalog table {@link Result}. * @param r Result to pull the region state from - * @return the region state, or OPEN if there's no value written. + * @return the region state, or null if unknown. */ protected State getRegionState(final Result r, int replicaId) { Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(replicaId)); - if (cell == null || cell.getValueLength() == 0) return State.OPENING; + if (cell == null || cell.getValueLength() == 0) return null; return State.valueOf(Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedure.java index 58042d8069..96f089929b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRestoreSnapshotProcedure.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.junit.After; import org.junit.Before; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -193,6 +194,7 @@ public class TestRestoreSnapshotProcedure extends TestTableDDLProcedureBase { } } + @Ignore // See HBASE-19193 @Test(timeout=60000) public void testRecoveryAndDoubleExecution() throws Exception { final ProcedureExecutor procExec = getMasterProcedureExecutor(); -- 2.11.0 (Apple Git-81)