From 9d13bc014d7214418f12afb19717a35de2713b72 Mon Sep 17 00:00:00 2001 From: Umesh Agashe Date: Thu, 20 Apr 2017 10:22:38 -0700 Subject: [PATCH] HBASE-16549: Added new metrics for AMv2 procedures Following AMv2 procedures are modified to override onSubmit(), onFinish() hooks provided by HBASE-17888 to do metrics calculations when procedures are submitted and finshed: * AssignProcedure * UnassignProcedure * MergeTableRegionProcedure * SplitTableRegionProcedure * ServerCrashProcedure Following metrics is collected for each of the above procedure during lifetime of a process: * Total number of requests submitted for a type of procedure * Histogram of runtime in milliseconds for successfully completed procedures * Total number of failed procedures As we are moving away from Hadoop's metric2, hbase-metrics-api module is used for newly added metrics. Modified existing tests to verify count of procedures. --- .../master/MetricsAssignmentManagerSource.java | 31 +++++--- .../hadoop/hbase/master/MetricsMasterSource.java | 9 ++- .../hadoop/hbase/metrics/OperationMetrics.java | 61 ++++++++++++++++ .../master/MetricsAssignmentManagerSourceImpl.java | 42 ++++++++--- .../hbase/master/MetricsMasterSourceImpl.java | 14 ++++ hbase-procedure/pom.xml | 4 ++ .../apache/hadoop/hbase/procedure2/Procedure.java | 60 +++++++++++++--- .../hadoop/hbase/procedure2/ProcedureMetrics.java | 50 +++++++++++++ .../org/apache/hadoop/hbase/master/HMaster.java | 3 +- .../apache/hadoop/hbase/master/MasterServices.java | 5 ++ .../hbase/master/MetricsAssignmentManager.java | 42 ++++++++--- .../apache/hadoop/hbase/master/MetricsMaster.java | 44 ++++++++++++ .../hbase/master/assignment/AssignProcedure.java | 7 +- .../hbase/master/assignment/AssignmentManager.java | 8 --- .../assignment/MergeTableRegionsProcedure.java | 8 ++- .../assignment/SplitTableRegionProcedure.java | 7 ++ .../hbase/master/assignment/UnassignProcedure.java | 7 ++ .../master/procedure/ServerCrashProcedure.java | 7 ++ .../hbase/master/MockNoopMasterServices.java | 5 ++ .../master/assignment/TestAssignmentManager.java | 75 ++++++++++++++++++++ .../assignment/TestMergeTableRegionsProcedure.java | 46 ++++++++++++ .../assignment/TestSplitTableRegionProcedure.java | 82 +++++++++++++++++++++- .../master/procedure/TestServerCrashProcedure.java | 12 ++++ 23 files changed, 580 insertions(+), 49 deletions(-) create mode 100644 hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/metrics/OperationMetrics.java create mode 100644 hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureMetrics.java diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSource.java index 2ebf8c9625433249f1f656c76963527b4a5004be..4e4a9e0cd28ef62b7d024d880f002b1ac9893602 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSource.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master; import org.apache.hadoop.hbase.metrics.BaseSource; +import org.apache.hadoop.hbase.metrics.OperationMetrics; public interface MetricsAssignmentManagerSource extends BaseSource { @@ -42,13 +43,11 @@ public interface MetricsAssignmentManagerSource extends BaseSource { */ String METRICS_DESCRIPTION = "Metrics about HBase master assignment manager."; + // RIT metrics String RIT_COUNT_NAME = "ritCount"; String RIT_COUNT_OVER_THRESHOLD_NAME = "ritCountOverThreshold"; String RIT_OLDEST_AGE_NAME = "ritOldestAge"; String RIT_DURATION_NAME = "ritDuration"; - String ASSIGN_TIME_NAME = "assign"; - String UNASSIGN_TIME_NAME = "unassign"; - String BULK_ASSIGN_TIME_NAME = "bulkAssign"; String RIT_COUNT_DESC = "Current number of Regions In Transition (Gauge)."; String RIT_COUNT_OVER_THRESHOLD_DESC = @@ -57,6 +56,11 @@ public interface MetricsAssignmentManagerSource extends BaseSource { String RIT_DURATION_DESC = "Total durations in milliseconds for all Regions in Transition (Histogram)."; + String ASSIGN_METRIC_PREFIX = "assign"; + String UNASSIGN_METRIC_PREFIX = "unassign"; + String SPLIT_METRIC_PREFIX = "split"; + String MERGE_METRIC_PREFIX = "merge"; + String OPERATION_COUNT_NAME = "operationCount"; /** @@ -83,17 +87,28 @@ public interface MetricsAssignmentManagerSource extends BaseSource { void updateRitDuration(long duration); /** - * Increment the count of assignment operation (assign/unassign). + * TODO: Remove. This may not be needed now as assign and unassign counts are tracked separately + * Increment the count of operations (assign/unassign). */ void incrementOperationCounter(); /** - * Add the time took to perform the last assign operation + * @return {@link OperationMetrics} containing common metrics for assign operation + */ + OperationMetrics getAssignMetrics(); + + /** + * @return {@link OperationMetrics} containing common metrics for unassign operation + */ + OperationMetrics getUnassignMetrics(); + + /** + * @return {@link OperationMetrics} containing common metrics for split operation */ - void updateAssignTime(long time); + OperationMetrics getSplitMetrics(); /** - * Add the time took to perform the last unassign operation + * @return {@link OperationMetrics} containing common metrics for merge operation */ - void updateUnassignTime(long time); + OperationMetrics getMergeMetrics(); } diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java index 699519834f452b2099d8d3653985622af33c8a08..9163511143cbf9fb5e2952620f9dd6f370c69e7e 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master; import org.apache.hadoop.hbase.metrics.BaseSource; +import org.apache.hadoop.hbase.metrics.OperationMetrics; /** * Interface that classes that expose metrics about the master will implement. @@ -75,6 +76,7 @@ public interface MetricsMasterSource extends BaseSource { String SPLIT_PLAN_COUNT_DESC = "Number of Region Split Plans executed"; String MERGE_PLAN_COUNT_DESC = "Number of Region Merge Plans executed"; + String SERVER_CRASH_METRIC_PREFIX = "serverCrash"; /** * Increment the number of requests the cluster has seen. @@ -83,7 +85,8 @@ public interface MetricsMasterSource extends BaseSource { */ void incRequests(final long inc); - - - + /** + * @return {@link OperationMetrics} containing common metrics for server crash operation + */ + OperationMetrics getServerCrashMetrics(); } diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/metrics/OperationMetrics.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/metrics/OperationMetrics.java new file mode 100644 index 0000000000000000000000000000000000000000..510822c6837a64af1375c78897f99973a9b9079a --- /dev/null +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/metrics/OperationMetrics.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.metrics; + +import com.google.common.base.Preconditions; + +/** + * Container class for commonly collected metrics for most operations. Instantiate this class to + * collect submitted count, failed count and time histogram for an operation. + */ +public class OperationMetrics { + private static final String SUBMITTED_COUNT = "SubmittedCount"; + private static final String TIME = "Time"; + private static final String FAILED_COUNT = "FailedCount"; + + final private Counter submittedCounter; + final private Histogram timeHisto; + final private Counter failedCounter; + + public OperationMetrics(final MetricRegistry registry, final String metricNamePrefix) { + Preconditions.checkNotNull(registry); + Preconditions.checkNotNull(metricNamePrefix); + + /** + * TODO: As of now, Metrics description cannot be added/ registered with + * {@link MetricRegistry}. As metric names are unambiguous but concise, descriptions of + * metrics need to be made available someplace for users. + */ + submittedCounter = registry.counter(metricNamePrefix + SUBMITTED_COUNT); + timeHisto = registry.histogram(metricNamePrefix + TIME); + failedCounter = registry.counter(metricNamePrefix + FAILED_COUNT); + } + + public Counter getSubmittedCounter() { + return submittedCounter; + } + + public Histogram getTimeHisto() { + return timeHisto; + } + + public Counter getFailedCounter() { + return failedCounter; + } +} diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSourceImpl.java index 14b7e718dedcdb2364eff34054c45ae7a2384f01..b522bf95bff9585b9bc805e510f08f48376d57df 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSourceImpl.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.metrics.BaseSourceImpl; +import org.apache.hadoop.hbase.metrics.OperationMetrics; import org.apache.hadoop.metrics2.MetricHistogram; import org.apache.hadoop.metrics2.lib.MutableFastCounter; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; @@ -35,8 +36,11 @@ public class MetricsAssignmentManagerSourceImpl private MetricHistogram ritDurationHisto; private MutableFastCounter operationCounter; - private MetricHistogram assignTimeHisto; - private MetricHistogram unassignTimeHisto; + + private OperationMetrics assignMetrics; + private OperationMetrics unassignMetrics; + private OperationMetrics splitMetrics; + private OperationMetrics mergeMetrics; public MetricsAssignmentManagerSourceImpl() { this(METRICS_NAME, METRICS_DESCRIPTION, METRICS_CONTEXT, METRICS_JMX_CONTEXT); @@ -53,10 +57,18 @@ public class MetricsAssignmentManagerSourceImpl ritCountOverThresholdGauge = metricsRegistry.newGauge(RIT_COUNT_OVER_THRESHOLD_NAME, RIT_COUNT_OVER_THRESHOLD_DESC,0l); ritOldestAgeGauge = metricsRegistry.newGauge(RIT_OLDEST_AGE_NAME, RIT_OLDEST_AGE_DESC, 0l); - assignTimeHisto = metricsRegistry.newTimeHistogram(ASSIGN_TIME_NAME); - unassignTimeHisto = metricsRegistry.newTimeHistogram(UNASSIGN_TIME_NAME); ritDurationHisto = metricsRegistry.newTimeHistogram(RIT_DURATION_NAME, RIT_DURATION_DESC); operationCounter = metricsRegistry.getCounter(OPERATION_COUNT_NAME, 0l); + + /** + * NOTE: Please refer to HBASE-9774 and HBASE-14282. Based on these two issues, HBase is + * moving away from using Hadoop's metric2 to having independent HBase specific Metrics. Use + * {@link BaseSourceImpl#registry} to register the new metrics. + */ + assignMetrics = new OperationMetrics(registry, ASSIGN_METRIC_PREFIX); + unassignMetrics = new OperationMetrics(registry, UNASSIGN_METRIC_PREFIX); + splitMetrics = new OperationMetrics(registry, SPLIT_METRIC_PREFIX); + mergeMetrics = new OperationMetrics(registry, MERGE_METRIC_PREFIX); } @Override @@ -80,17 +92,27 @@ public class MetricsAssignmentManagerSourceImpl } @Override - public void updateAssignTime(final long time) { - assignTimeHisto.add(time); + public void updateRitDuration(long duration) { + ritDurationHisto.add(duration); } @Override - public void updateUnassignTime(final long time) { - unassignTimeHisto.add(time); + public OperationMetrics getAssignMetrics() { + return assignMetrics; } @Override - public void updateRitDuration(long duration) { - ritDurationHisto.add(duration); + public OperationMetrics getUnassignMetrics() { + return unassignMetrics; + } + + @Override + public OperationMetrics getSplitMetrics() { + return splitMetrics; + } + + @Override + public OperationMetrics getMergeMetrics() { + return mergeMetrics; } } diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java index fc859e5f6adf4b9818814d0ce91b614309c7a1cb..f21e29e727fe852a4c132b6b5bd7bd1aaf559ea5 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.master; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.metrics.BaseSourceImpl; import org.apache.hadoop.hbase.metrics.Interns; +import org.apache.hadoop.hbase.metrics.OperationMetrics; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.lib.MutableFastCounter; @@ -37,6 +38,8 @@ public class MetricsMasterSourceImpl private final MetricsMasterWrapper masterWrapper; private MutableFastCounter clusterRequestsCounter; + private OperationMetrics serverCrashMetrics; + public MetricsMasterSourceImpl(MetricsMasterWrapper masterWrapper) { this(METRICS_NAME, METRICS_DESCRIPTION, @@ -59,6 +62,13 @@ public class MetricsMasterSourceImpl public void init() { super.init(); clusterRequestsCounter = metricsRegistry.newCounter(CLUSTER_REQUESTS_NAME, "", 0l); + + /** + * NOTE: Please refer to HBASE-9774 and HBASE-14282. Based on these two issues, HBase is + * moving away from using Hadoop's metric2 to having independent HBase specific Metrics. Use + * {@link BaseSourceImpl#registry} to register the new metrics. + */ + serverCrashMetrics = new OperationMetrics(registry, SERVER_CRASH_METRIC_PREFIX); } @Override @@ -105,4 +115,8 @@ public class MetricsMasterSourceImpl metricsRegistry.snapshot(metricsRecordBuilder, all); } + @Override + public OperationMetrics getServerCrashMetrics() { + return serverCrashMetrics; + } } diff --git a/hbase-procedure/pom.xml b/hbase-procedure/pom.xml index f8e2c8aac2fdee88e95277a7ba7256e1f11769c2..eac6340fe2ac7135b85778717b1aef5b67476f73 100644 --- a/hbase-procedure/pom.xml +++ b/hbase-procedure/pom.xml @@ -86,6 +86,10 @@ commons-logging commons-logging + + org.apache.hbase + hbase-metrics-api + diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java index 380284d348b8418282c38d8531e4d6f492605fb8..b2f4f154117ad4d9510457b785a7b435c450fe3f 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java @@ -30,6 +30,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.exceptions.TimeoutIOException; +import org.apache.hadoop.hbase.metrics.Counter; +import org.apache.hadoop.hbase.metrics.Histogram; import org.apache.hadoop.hbase.procedure2.util.StringUtils; import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; import org.apache.hadoop.hbase.security.User; @@ -246,25 +248,67 @@ public abstract class Procedure implements Comparable { } /** - * This function will be called just when procedure is submitted for execution. Override this - * method to update the metrics at the beginning of the procedure + * Override this method to provide procedure specific counters for submitted count, failed + * count and time histogram. + * @param env The environment passed to the procedure executor + * @return Container object for procedure related metric */ - protected void updateMetricsOnSubmit(final TEnvironment env) {} + protected ProcedureMetrics getProcedureMetrics(final TEnvironment env) { + return null; + } + + /** + * This function will be called just when procedure is submitted for execution. Override this + * method to update the metrics at the beginning of the procedure. The default implementation + * updates submitted counter if {@link #getProcedureMetrics(Object)} returns non-null + * {@link ProcedureMetrics}. + */ + protected void updateMetricsOnSubmit(final TEnvironment env) { + ProcedureMetrics metrics = getProcedureMetrics(env); + if (metrics == null) { + return; + } + + Counter submittedCounter = metrics.getSubmittedCounter(); + if (submittedCounter != null) { + submittedCounter.increment(); + } + } /** * This function will be called just after procedure execution is finished. Override this method - * to update metrics at the end of the procedure + * to update metrics at the end of the procedure. If {@link #getProcedureMetrics(Object)} + * returns non-null {@link ProcedureMetrics}, the default implementation adds runtime of a + * procedure to a time histogram for successfully completed procedures. Increments failed + * counter for failed procedures. * * TODO: As any of the sub-procedures on failure rolls back all procedures in the stack, * including successfully finished siblings, this function may get called twice in certain * cases for certain procedures. Explore further if this can be called once. * - * @param env - * @param runtime - Runtime of the procedure in milliseconds - * @param success - true if procedure is completed successfully + * @param env The environment passed to the procedure executor + * @param runtime Runtime of the procedure in milliseconds + * @param success true if procedure is completed successfully */ protected void updateMetricsOnFinish(final TEnvironment env, final long runtime, - boolean success) {} + boolean success) { + ProcedureMetrics metrics = getProcedureMetrics(env); + if (metrics == null) { + return; + } + + if (success) { + Histogram timeHisto = metrics.getTimeHisto(); + if (timeHisto != null) { + timeHisto.update(runtime); + } + } else { + Counter failedCounter = metrics.getFailedCounter(); + if (failedCounter != null) { + failedCounter.increment(); + } + } + } @Override public String toString() { diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureMetrics.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureMetrics.java new file mode 100644 index 0000000000000000000000000000000000000000..af6a5f34f846c38388fcd01e1cd846b3529e80a2 --- /dev/null +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureMetrics.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.procedure2; + +import org.apache.hadoop.hbase.metrics.Counter; +import org.apache.hadoop.hbase.metrics.Histogram; + +/** + * With this interface, the procedure framework provides means to collect following set of metrics + * per procedure type for all procedures: + *
    + *
  • Count of submitted procedure instances
  • + *
  • Time histogram for successfully completed procedure instances
  • + *
  • Count of failed procedure instances
  • + *
+ * + * Please implement this interface to return appropriate metrics. + */ +public interface ProcedureMetrics { + /** + * @return Total number of instances submitted for a type of a procedure + */ + Counter getSubmittedCounter(); + + /** + * @return Histogram of runtimes for all successfully completed instances of a type of a procedure + */ + Histogram getTimeHisto(); + + /** + * @return Total number of instances failed for a type of a procedure + */ + Counter getFailedCounter(); +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 66791590c557b2fab0cc97decd4907cf489d1c28..0370a5c1eedac411d1f52353b25883149633fa70 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -635,7 +635,8 @@ public class HMaster extends HRegionServer implements MasterServices { return MasterDumpServlet.class; } - MetricsMaster getMasterMetrics() { + @Override + public MetricsMaster getMasterMetrics() { return metricsMaster; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index 5a45fcf6511db2e941e5aff7b5979ccf0afc2d51..c33c7d353e9c6930e9b7faf88d70f60aef002414 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -125,6 +125,11 @@ public interface MasterServices extends Server { ProcedureExecutor getMasterProcedureExecutor(); /** + * @return Master's instance of {@link MetricsMaster} + */ + MetricsMaster getMasterMetrics(); + + /** * Check table is modifiable; i.e. exists and is offline. * @param tableName Name of table to check. * @throws TableNotDisabledException diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManager.java index c7ce9a9e0e9701aa53af8d1a2f5f1062f6a8d964..7d7dd8144591b0d4dbf8d6b0d04ed4b5b796027f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManager.java @@ -19,13 +19,26 @@ package org.apache.hadoop.hbase.master; import org.apache.hadoop.hbase.CompatibilitySingletonFactory; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; + +import static org.apache.hadoop.hbase.master.MetricsMaster.convertToProcedureMetrics; public class MetricsAssignmentManager { private final MetricsAssignmentManagerSource assignmentManagerSource; + private final ProcedureMetrics assignProcMetrics; + private final ProcedureMetrics unassignProcMetrics; + private final ProcedureMetrics splitProcMetrics; + private final ProcedureMetrics mergeProcMetrics; + public MetricsAssignmentManager() { assignmentManagerSource = CompatibilitySingletonFactory.getInstance( MetricsAssignmentManagerSource.class); + + assignProcMetrics = convertToProcedureMetrics(assignmentManagerSource.getAssignMetrics()); + unassignProcMetrics = convertToProcedureMetrics(assignmentManagerSource.getUnassignMetrics()); + splitProcMetrics = convertToProcedureMetrics(assignmentManagerSource.getSplitMetrics()); + mergeProcMetrics = convertToProcedureMetrics(assignmentManagerSource.getMergeMetrics()); } public MetricsAssignmentManagerSource getMetricsProcSource() { @@ -66,6 +79,7 @@ public class MetricsAssignmentManager { } /* + * TODO: Remove. This may not be required as assign and unassign operations are tracked separately * Increment the count of assignment operation (assign/unassign). */ public void incrementOperationCounter() { @@ -73,18 +87,30 @@ public class MetricsAssignmentManager { } /** - * Add the time took to perform the last assign operation - * @param time + * @return Set of common metrics for assign procedure + */ + public ProcedureMetrics getAssignProcMetrics() { + return assignProcMetrics; + } + + /** + * @return Set of common metrics for unassign procedure + */ + public ProcedureMetrics getUnassignProcMetrics() { + return unassignProcMetrics; + } + + /** + * @return Set of common metrics for split procedure */ - public void updateAssignTime(final long time) { - assignmentManagerSource.updateAssignTime(time); + public ProcedureMetrics getSplitProcMetrics() { + return splitProcMetrics; } /** - * Add the time took to perform the last unassign operation - * @param time + * @return Set of common metrics for merge procedure */ - public void updateUnassignTime(final long time) { - assignmentManagerSource.updateUnassignTime(time); + public ProcedureMetrics getMergeProcMetrics() { + return mergeProcMetrics; } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java index d05585393d6098d3cdf2fa34b9d66df03955b4d2..069e48fa399a3131170b9f432dde2d3fd9742e9a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java @@ -23,6 +23,10 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.CompatibilitySingletonFactory; +import org.apache.hadoop.hbase.metrics.Counter; +import org.apache.hadoop.hbase.metrics.Histogram; +import org.apache.hadoop.hbase.metrics.OperationMetrics; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; /** * This class is for maintaining the various master statistics @@ -38,10 +42,14 @@ public class MetricsMaster { private MetricsMasterSource masterSource; private MetricsMasterProcSource masterProcSource; + private ProcedureMetrics serverCrashProcMetrics; + public MetricsMaster(MetricsMasterWrapper masterWrapper) { masterSource = CompatibilitySingletonFactory.getInstance(MetricsMasterSourceFactory.class).create(masterWrapper); masterProcSource = CompatibilitySingletonFactory.getInstance(MetricsMasterProcSourceFactory.class).create(masterWrapper); + + serverCrashProcMetrics = convertToProcedureMetrics(masterSource.getServerCrashMetrics()); } // for unit-test usage @@ -59,4 +67,40 @@ public class MetricsMaster { public void incrementRequests(final long inc) { masterSource.incRequests(inc); } + + /** + * @return Set of metrics for assign procedure + */ + public ProcedureMetrics getServerCrashProcMetrics() { + return serverCrashProcMetrics; + } + + /** + * This is utility function that converts {@link OperationMetrics} to {@link ProcedureMetrics}. + * + * NOTE: Procedure framework in hbase-procedure module accesses metrics common to most procedures + * through {@link ProcedureMetrics} interface. Metrics source classes in hbase-hadoop-compat + * module provides similar interface {@link OperationMetrics} that contains metrics common to + * most operations. As both hbase-procedure and hbase-hadoop-compat are lower level modules used + * by hbase-server (this) module and there is no dependency between them, this method does the + * required conversion. + */ + public static ProcedureMetrics convertToProcedureMetrics(final OperationMetrics metrics) { + return new ProcedureMetrics() { + @Override + public Counter getSubmittedCounter() { + return metrics.getSubmittedCounter(); + } + + @Override + public Histogram getTimeHisto() { + return metrics.getTimeHisto(); + } + + @Override + public Counter getFailedCounter() { + return metrics.getFailedCounter(); + } + }; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java index 1bd67e11ba498988104016367a6efec834c29a8d..87e8e82e7b09f848152ee12898428047de4695cf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java @@ -33,8 +33,8 @@ import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionOpenOperation; -import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.RegionState.State; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; @@ -298,4 +298,9 @@ public class AssignProcedure extends RegionTransitionProcedure { public String toString() { return super.toString() + ", server=" + this.targetServer; } + + @Override + protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { + return env.getAssignmentManager().getAssignmentManagerMetrics().getAssignProcMetrics(); + } } \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 378db639bc44e5fb812d070daf96447f62b4d095..8fcc2e256ba2adb0966a2d60635afae216633cd2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1453,10 +1453,6 @@ public class AssignmentManager implements ServerListener { regionStateStore.updateRegionLocation(regionNode.getRegionInfo(), state, regionNode.getRegionLocation(), regionNode.getLastHost(), regionNode.getOpenSeqNum()); sendRegionOpenedNotification(hri, regionNode.getRegionLocation()); - // update assignment metrics - if (regionNode.getProcedure() != null) { - metrics.updateAssignTime(regionNode.getProcedure().elapsedTime()); - } } } @@ -1488,10 +1484,6 @@ public class AssignmentManager implements ServerListener { regionNode.getRegionLocation()/*null*/, regionNode.getLastHost(), HConstants.NO_SEQNUM); sendRegionClosedNotification(hri); - // Update assignment metrics - if (regionNode.getProcedure() != null) { - metrics.updateUnassignTime(regionNode.getProcedure().elapsedTime()); - } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index 27a98f56135b2202ce01a225c2f0fea770f83870..1d68c650a3a4603b16933c70af8c6c098f851077 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -45,7 +45,7 @@ import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.exceptions.MergeRegionException; import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.master.assignment.RegionStates; +import org.apache.hadoop.hbase.master.MetricsAssignmentManager; import org.apache.hadoop.hbase.master.CatalogJanitor; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.MasterFileSystem; @@ -53,6 +53,7 @@ import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; @@ -410,6 +411,11 @@ public class MergeTableRegionsProcedure return TableOperationType.MERGE; } + @Override + protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { + return env.getAssignmentManager().getAssignmentManagerMetrics().getMergeProcMetrics(); + } + /** * Prepare merge and do some check * @param env MasterProcedureEnv diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 6ff1ccf3d96954b26e09e5f8f3962372eb8c1867..c085a7b91fc326b2ba38e15f4ed48c7116010834 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -52,11 +52,13 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.MetricsAssignmentManager; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; @@ -344,6 +346,11 @@ public class SplitTableRegionProcedure return TableOperationType.SPLIT; } + @Override + protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { + return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); + } + private byte[] getSplitRow() { return daughter_2_HRI.getStartKey(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java index 004f3dcb25c257b0b98676712ef100b962dc8c34..92ef161e5c825beb0f5ca83056ffb114f357917b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java @@ -32,10 +32,12 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; +import org.apache.hadoop.hbase.master.MetricsAssignmentManager; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionCloseOperation; import org.apache.hadoop.hbase.master.RegionState.State; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState; @@ -216,4 +218,9 @@ public class UnassignProcedure extends RegionTransitionProcedure { public String toString() { return super.toString() + ", server=" + this.destinationServer; } + + @Override + protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { + return env.getAssignmentManager().getAssignmentManagerMetrics().getUnassignProcMetrics(); + } } \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 59cda0c93c8d155054eda73939414471d397d07e..1b97b6c4ac2ae56620e66a1bd164fd44dc249f84 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -33,7 +33,9 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.MasterWalManager; +import org.apache.hadoop.hbase.master.MetricsMaster; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; @@ -407,4 +409,9 @@ implements ServerProcedureInterface { // the client does not know about this procedure. return false; } + + @Override + protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { + return env.getMasterServices().getMasterMetrics().getServerCrashProcMetrics(); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java index e4909f5d9ec7255ac1c6666998a15932a9b9e212..756e947dae4c95d4ed8357f1f081ba04ca4ba93b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java @@ -133,6 +133,11 @@ public class MockNoopMasterServices implements MasterServices, Server { } @Override + public MetricsMaster getMasterMetrics() { + return null; + } + + @Override public ServerManager getServerManager() { return null; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java index 00d54bf2c89135df5302d058bba2dae4cd5179f8..de12d39eae6e12c2cf1c78915c18cdd4ac3a3af6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler; import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher; import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; import org.apache.hadoop.hbase.procedure2.util.StringUtils; @@ -105,6 +106,14 @@ public class TestAssignmentManager { private NavigableMap> regionsToRegionServers = new ConcurrentSkipListMap>(); + private ProcedureMetrics assignProcMetrics; + private ProcedureMetrics unassignProcMetrics; + + private long assignSubmittedCount = 0; + private long assignFailedCount = 0; + private long unassignSubmittedCount = 0; + private long unassignFailedCount = 0; + private void setupConfiguration(Configuration conf) throws Exception { FSUtils.setRootDir(conf, UTIL.getDataTestDir()); conf.setBoolean(WALProcedureStore.USE_HSYNC_CONF_KEY, false); @@ -122,6 +131,8 @@ public class TestAssignmentManager { rsDispatcher = new MockRSProcedureDispatcher(master); master.start(NSERVERS, rsDispatcher); am = master.getAssignmentManager(); + assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics(); + unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics(); setUpMeta(); } @@ -139,7 +150,14 @@ public class TestAssignmentManager { @Test public void testAssignWithGoodExec() throws Exception { + // collect AM metrics before test + collectAssignmentManagerMetrics(); + testAssign(new GoodRsExecutor()); + + assertEquals(assignSubmittedCount + NREGIONS, + assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); } @Test @@ -159,11 +177,19 @@ public class TestAssignmentManager { final TableName tableName = TableName.valueOf(this.name.getMethodName()); final HRegionInfo hri = createRegionInfo(tableName, 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + rsDispatcher.setMockRsExecutor(new SocketTimeoutRsExecutor(20, 3)); waitOnFuture(submitProcedure(am.createAssignProcedure(hri, false))); rsDispatcher.setMockRsExecutor(new SocketTimeoutRsExecutor(20, 3)); waitOnFuture(submitProcedure(am.createUnassignProcedure(hri, null, false))); + + assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); + assertEquals(unassignSubmittedCount + 1, unassignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); } @Test @@ -176,6 +202,9 @@ public class TestAssignmentManager { final MockRSExecutor executor) throws Exception { final HRegionInfo hri = createRegionInfo(tableName, 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Test Assign operation failure rsDispatcher.setMockRsExecutor(executor); try { @@ -193,19 +222,39 @@ public class TestAssignmentManager { // Test Unassign operation failure rsDispatcher.setMockRsExecutor(executor); waitOnFuture(submitProcedure(am.createUnassignProcedure(hri, null, false))); + + assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount()); + assertEquals(unassignSubmittedCount + 1, unassignProcMetrics.getSubmittedCounter().getCount()); + + // TODO: We supposed to have 1 failed assign, 1 successful assign and a failed unassign + // operation. But ProcV2 framework marks aborted unassign operation as success. Fix it! + assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); } @Test public void testIOExceptionOnAssignment() throws Exception { + // collect AM metrics before test + collectAssignmentManagerMetrics(); + testFailedOpen(TableName.valueOf("testExceptionOnAssignment"), new FaultyRsExecutor(new IOException("test fault"))); + + assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount()); } @Test public void testDoNotRetryExceptionOnAssignment() throws Exception { + // collect AM metrics before test + collectAssignmentManagerMetrics(); + testFailedOpen(TableName.valueOf("testDoNotRetryExceptionOnAssignment"), new FaultyRsExecutor(new DoNotRetryIOException("test do not retry fault"))); + + assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount + 1, assignProcMetrics.getFailedCounter().getCount()); } private void testFailedOpen(final TableName tableName, @@ -253,6 +302,9 @@ public class TestAssignmentManager { final TableName tableName = TableName.valueOf("testAssignAnAssignedRegion"); final HRegionInfo hri = createRegionInfo(tableName, 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + rsDispatcher.setMockRsExecutor(new GoodRsExecutor()); final Future futureA = submitProcedure(am.createAssignProcedure(hri, false)); @@ -267,6 +319,12 @@ public class TestAssignmentManager { waitOnFuture(futureB); am.getRegionStates().isRegionInState(hri, State.OPEN); // TODO: What else can we do to ensure just a noop. + + // TODO: Though second assign is noop, it's considered success, can noop be handled in a + // better way? + assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); + } @Test @@ -274,6 +332,9 @@ public class TestAssignmentManager { final TableName tableName = TableName.valueOf("testUnassignAnUnassignedRegion"); final HRegionInfo hri = createRegionInfo(tableName, 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + rsDispatcher.setMockRsExecutor(new GoodRsExecutor()); // assign the region first @@ -293,6 +354,13 @@ public class TestAssignmentManager { // Ensure we are still CLOSED. am.getRegionStates().isRegionInState(hri, State.CLOSED); // TODO: What else can we do to ensure just a noop. + + assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); + // TODO: Though second unassign is noop, it's considered success, can noop be handled in a + // better way? + assertEquals(unassignSubmittedCount + 2, unassignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); } private Future submitProcedure(final Procedure proc) { @@ -566,4 +634,11 @@ public class TestAssignmentManager { } } } + + private void collectAssignmentManagerMetrics() { + assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount(); + assignFailedCount = assignProcMetrics.getFailedCounter().getCount(); + unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount(); + unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount(); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java index 8be1be9a20d4ed2c890c8d180a0cfb09f257e7f3..6086db59ef9ef5a1f1933014d7d57ece38fdade9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; @@ -63,6 +64,17 @@ public class TestMergeTableRegionsProcedure { final static Configuration conf = UTIL.getConfiguration(); private static Admin admin; + private AssignmentManager am; + private ProcedureMetrics mergeProcMetrics; + private ProcedureMetrics assignProcMetrics; + private ProcedureMetrics unassignProcMetrics; + private long mergeSubmittedCount = 0; + private long mergeFailedCount = 0; + private long assignSubmittedCount = 0; + private long assignFailedCount = 0; + private long unassignSubmittedCount = 0; + private long unassignFailedCount = 0; + private static void setupConf(Configuration conf) { // Reduce the maximum attempts to speed up the test conf.setInt("hbase.assignment.maximum.attempts", 3); @@ -99,6 +111,10 @@ public class TestMergeTableRegionsProcedure { // Turn off the meta scanner so it don't remove parent on us. UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); resetProcExecutorTestingKillFlag(); + am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); + mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics(); + assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics(); + unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics(); } @After @@ -130,12 +146,22 @@ public class TestMergeTableRegionsProcedure { regionsToMerge[0] = tableRegions.get(0); regionsToMerge[1] = tableRegions.get(1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + long procId = procExec.submitProcedure(new MergeTableRegionsProcedure( procExec.getEnvironment(), regionsToMerge, true)); ProcedureTestingUtility.waitProcedure(procExec, procId); ProcedureTestingUtility.assertProcNotFailed(procExec, procId); assertRegionCount(tableName, initialRegionCount - 1); + + assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount()); + assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); + assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); + assertEquals(unassignSubmittedCount + 2, unassignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); } /** @@ -155,6 +181,9 @@ public class TestMergeTableRegionsProcedure { regionsToMerge2[0] = tableRegions.get(2); regionsToMerge2[1] = tableRegions.get(3); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + long procId1 = procExec.submitProcedure(new MergeTableRegionsProcedure( procExec.getEnvironment(), regionsToMerge1, true)); long procId2 = procExec.submitProcedure(new MergeTableRegionsProcedure( @@ -164,6 +193,13 @@ public class TestMergeTableRegionsProcedure { ProcedureTestingUtility.assertProcNotFailed(procExec, procId1); ProcedureTestingUtility.assertProcNotFailed(procExec, procId2); assertRegionCount(tableName, initialRegionCount - 2); + + assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount()); + assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); + assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); + assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); } @Test(timeout=60000) @@ -237,4 +273,14 @@ public class TestMergeTableRegionsProcedure { private ProcedureExecutor getMasterProcedureExecutor() { return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); } + + private void collectAssignmentManagerMetrics() { + mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount(); + mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount(); + + assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount(); + assignFailedCount = assignProcMetrics.getFailedCounter().getCount(); + unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount(); + unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount(); + } } \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java index 7af9d67369af8916ec71003d0918a68407a56584..0dafb6463250a74a1706ac5840ed5651c5a0e10d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.testclassification.MasterTests; @@ -66,6 +67,7 @@ import org.junit.rules.TestRule; @Category({MasterTests.class, MediumTests.class}) public class TestSplitTableRegionProcedure { private static final Log LOG = LogFactory.getLog(TestSplitTableRegionProcedure.class); + @Rule public final TestRule timeout = CategoryBasedTimeout.builder().withTimeout(this.getClass()). withLookingForStuckThread(true).build(); @@ -77,6 +79,19 @@ public class TestSplitTableRegionProcedure { private static final int startRowNum = 11; private static final int rowCount = 60; + private AssignmentManager am; + + private ProcedureMetrics splitProcMetrics; + private ProcedureMetrics assignProcMetrics; + private ProcedureMetrics unassignProcMetrics; + + private long splitSubmittedCount = 0; + private long splitFailedCount = 0; + private long assignSubmittedCount = 0; + private long assignFailedCount = 0; + private long unassignSubmittedCount = 0; + private long unassignFailedCount = 0; + @Rule public TestName name = new TestName(); @@ -108,6 +123,10 @@ public class TestSplitTableRegionProcedure { UTIL.getAdmin().setBalancerRunning(false, true); // Turn off the meta scanner so it don't remove parent on us. UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); + am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); + splitProcMetrics = am.getAssignmentManagerMetrics().getSplitProcMetrics(); + assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics(); + unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics(); } @After @@ -132,6 +151,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -140,7 +162,14 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.assertProcNotFailed(procExec, procId); verify(tableName, splitRowNum); - } + + assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount()); + assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount()); + assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); + assertEquals(unassignSubmittedCount + 1, unassignProcMetrics.getSubmittedCounter().getCount()); + assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); +} @Test(timeout=60000) public void testSplitTableRegionNoStoreFile() throws Exception { @@ -155,6 +184,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -164,6 +196,9 @@ public class TestSplitTableRegionProcedure { assertTrue(UTIL.getMiniHBaseCluster().getRegions(tableName).size() == 2); assertTrue(UTIL.countRows(tableName) == 0); + + assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount()); + assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount()); } @Test(timeout=60000) @@ -181,6 +216,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -189,6 +227,9 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.assertProcNotFailed(procExec, procId); verify(tableName, splitRowNum); + + assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount()); + assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount()); } @Test(timeout=60000) @@ -206,6 +247,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -218,6 +262,9 @@ public class TestSplitTableRegionProcedure { assertTrue(daughters.size() == 2); assertTrue(UTIL.countRows(tableName) == rowCount); assertTrue(UTIL.countRows(daughters.get(0)) == 0 || UTIL.countRows(daughters.get(1)) == 0); + + assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter()); + assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount()); } @Test(timeout=60000) @@ -236,6 +283,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -258,6 +308,9 @@ public class TestSplitTableRegionProcedure { final int currentRowCount = splitRowNum - startRowNum; assertTrue(UTIL.countRows(tableName) == currentRowCount); assertTrue(UTIL.countRows(daughters.get(0)) == 0 || UTIL.countRows(daughters.get(1)) == 0); + + assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount()); + assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount()); } @Test(timeout=60000) @@ -272,6 +325,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table with null split key try { long procId1 = procExec.submitProcedure( @@ -281,6 +337,9 @@ public class TestSplitTableRegionProcedure { } catch (DoNotRetryIOException e) { LOG.debug("Expected Split procedure construction failure: " + e.getMessage()); } + + assertEquals(splitSubmittedCount, splitProcMetrics.getSubmittedCounter().getCount()); + assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount()); } @Test(timeout = 60000) @@ -299,6 +358,9 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.waitNoProcedureRunning(procExec); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -315,6 +377,9 @@ public class TestSplitTableRegionProcedure { assertEquals(1, daughters.size()); verifyData(daughters.get(0), startRowNum, rowCount, Bytes.toBytes(ColumnFamilyName1), Bytes.toBytes(ColumnFamilyName2)); + + assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount()); + assertEquals(splitFailedCount + 1, splitProcMetrics.getFailedCounter().getCount()); } @Test(timeout=60000) @@ -333,6 +398,9 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.waitNoProcedureRunning(procExec); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -342,6 +410,9 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.assertProcNotFailed(procExec, procId); verify(tableName, splitRowNum); + + assertEquals(splitSubmittedCount + 1, splitProcMetrics.getSubmittedCounter().getCount()); + assertEquals(splitFailedCount, splitProcMetrics.getFailedCounter().getCount()); } private void insertData(final TableName tableName) throws IOException, InterruptedException { @@ -424,4 +495,13 @@ public class TestSplitTableRegionProcedure { private ProcedureExecutor getMasterProcedureExecutor() { return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); } + + private void collectAssignmentManagerMetrics() { + splitSubmittedCount = splitProcMetrics.getSubmittedCounter().getCount(); + splitFailedCount = splitProcMetrics.getFailedCounter().getCount(); + assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount(); + assignFailedCount = assignProcMetrics.getFailedCounter().getCount(); + unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount(); + unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount(); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java index 8cee4d8bbd4736c88f56b0d79d0031614b4a5530..e34e3582ad86ec2181fd13c44f9f709411b8e201 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; @@ -48,6 +49,10 @@ public class TestServerCrashProcedure { private HBaseTestingUtility util; + private ProcedureMetrics serverCrashProcMetrics; + private long serverCrashSubmittedCount = 0; + private long serverCrashFailedCount = 0; + private void setupConf(Configuration conf) { conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); conf.set("hbase.balancer.tablesOnMaster", "none"); @@ -61,6 +66,8 @@ public class TestServerCrashProcedure { this.util.startMiniCluster(3); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate( this.util.getHBaseCluster().getMaster().getMasterProcedureExecutor(), false); + serverCrashProcMetrics = this.util.getHBaseCluster().getMaster().getMasterMetrics() + .getServerCrashProcMetrics(); } @After @@ -141,4 +148,9 @@ public class TestServerCrashProcedure { t.close(); } } + + private void collectMasterMetrics() { + serverCrashSubmittedCount = serverCrashProcMetrics.getSubmittedCounter().getCount(); + serverCrashFailedCount = serverCrashProcMetrics.getFailedCounter().getCount(); + } } -- 2.10.1 (Apple Git-78)