From 95e42a6cc464504230185cf104ae514955972348 Mon Sep 17 00:00:00 2001 From: Chia-Ping Tsai Date: Tue, 1 May 2018 15:16:13 -0700 Subject: [PATCH] HBASE-20169 NPE when calling HBTU.shutdownMiniCluster (TestAssignmentManagerMetrics is flakey); AMENDMENT --- .../hadoop/hbase/procedure2/ProcedureExecutor.java | 17 +++++++---------- .../hbase/master/TestAssignmentManagerMetrics.java | 6 ++++++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java index e5105a5045..bf86c1419d 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java @@ -264,9 +264,9 @@ public class ProcedureExecutor { private final CopyOnWriteArrayList listeners = new CopyOnWriteArrayList<>(); private Configuration conf; - private ThreadGroup threadGroup; + private final ThreadGroup threadGroup; private CopyOnWriteArrayList workerThreads; - private TimeoutExecutorThread timeoutExecutor; + private final TimeoutExecutorThread timeoutExecutor; private int corePoolSize; private int maxPoolSize; @@ -299,6 +299,11 @@ public class ProcedureExecutor { this.conf = conf; this.checkOwnerSet = conf.getBoolean(CHECK_OWNER_SET_CONF_KEY, DEFAULT_CHECK_OWNER_SET); refreshConfiguration(conf); + // Create the Thread Group for the executors + threadGroup = new ThreadGroup("PEWorkerGroup"); + + // Create the timeout executor + timeoutExecutor = new TimeoutExecutorThread(this, threadGroup); } private void load(final boolean abortOnCorruption) throws IOException { @@ -510,11 +515,6 @@ public class ProcedureExecutor { LOG.info("Starting {} core workers (bigger of cpus/4 or 16) with max (burst) worker count={}", corePoolSize, maxPoolSize); - // Create the Thread Group for the executors - threadGroup = new ThreadGroup("PEWorkerGroup"); - - // Create the timeout executor - timeoutExecutor = new TimeoutExecutorThread(this, threadGroup); // Create the workers workerId.set(0); @@ -576,7 +576,6 @@ public class ProcedureExecutor { // stop the timeout executor timeoutExecutor.awaitTermination(); - timeoutExecutor = null; // stop the worker threads for (WorkerThread worker: workerThreads) { @@ -590,8 +589,6 @@ public class ProcedureExecutor { } catch (IllegalThreadStateException e) { LOG.error("ThreadGroup " + threadGroup + " contains running threads; " + e.getMessage()); threadGroup.list(); - } finally { - threadGroup = null; } // reset the in-memory state for testing diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java index 87f6fa4e25..4c9d0e3c33 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerMetrics.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; import org.apache.hadoop.hbase.test.MetricsAssertHelper; import org.apache.hadoop.hbase.testclassification.MasterTests; @@ -91,6 +92,8 @@ public class TestAssignmentManagerMetrics { // set a small interval for updating rit metrics conf.setInt(AssignmentManager.RIT_CHORE_INTERVAL_MSEC_CONF_KEY, MSG_INTERVAL); + // keep rs online so it can report the failed opens. + conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, false); TEST_UTIL.startMiniCluster(1); CLUSTER = TEST_UTIL.getHBaseCluster(); MASTER = CLUSTER.getMaster(); @@ -148,6 +151,9 @@ public class TestAssignmentManagerMetrics { } // Sleep 3 seconds, wait for doMetrics chore catching up + // the rit count consists of rit and failed opens. see RegionInTransitionStat#update + // Waiting for the completion of rit makes the assert stable. + TEST_UTIL.waitUntilNoRegionsInTransition(); Thread.sleep(MSG_INTERVAL * 3); METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource); METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1, -- 2.16.3