From fa3fe54ca4c2afa55e5708546cfba91109065295 Mon Sep 17 00:00:00 2001 From: Umesh Agashe Date: Thu, 20 Apr 2017 10:22:38 -0700 Subject: [PATCH] HBASE-16549: Added new metrics for AMv2 procedures Following AMv2 procedures are modified to override onSubmit(), onFinish() hooks provided by HBASE-17888 to do metrics calculations when procedures are submitted and finshed: * AssignProcedure * UnassignProcedure * MergeTableRegionProcedure * SplitTableRegionProcedure * ServerCrashProcedure Following metrics is collected for each of the above procedure during lifetime of a process: * Total number of requests submitted for a type of procedure * Histogram of runtime in milliseconds for successfully completed procedures * Total number of failed procedures Modified existing tests to verify count of procedures. Change-Id: I3d135900f72a085e302a7394834e431e0bcf6d7b --- .../master/MetricsAssignmentManagerSource.java | 96 +++++++++++++++++- .../hadoop/hbase/master/MetricsMasterSource.java | 23 ++++- .../master/MetricsAssignmentManagerSourceImpl.java | 92 +++++++++++++++++- .../hbase/master/MetricsMasterSourceImpl.java | 26 +++++ .../org/apache/hadoop/hbase/master/HMaster.java | 3 +- .../apache/hadoop/hbase/master/MasterServices.java | 5 + .../hbase/master/MetricsAssignmentManager.java | 73 ++++++++++++++ .../apache/hadoop/hbase/master/MetricsMaster.java | 21 ++++ .../hbase/master/assignment/AssignProcedure.java | 21 ++++ .../hbase/master/assignment/AssignmentManager.java | 8 -- .../assignment/MergeTableRegionsProcedure.java | 22 ++++- .../assignment/SplitTableRegionProcedure.java | 21 ++++ .../hbase/master/assignment/UnassignProcedure.java | 21 ++++ .../master/procedure/ServerCrashProcedure.java | 19 ++++ .../hbase/master/MockNoopMasterServices.java | 5 + .../master/assignment/TestAssignmentManager.java | 101 +++++++++++++++++++ .../assignment/TestMergeTableRegionsProcedure.java | 65 +++++++++++++ .../assignment/TestSplitTableRegionProcedure.java | 107 ++++++++++++++++++++- .../master/procedure/TestServerCrashProcedure.java | 18 ++++ 19 files changed, 729 insertions(+), 18 deletions(-) diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSource.java index 2ebf8c9..65cbee6 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSource.java @@ -42,13 +42,11 @@ public interface MetricsAssignmentManagerSource extends BaseSource { */ String METRICS_DESCRIPTION = "Metrics about HBase master assignment manager."; + // RIT metrics String RIT_COUNT_NAME = "ritCount"; String RIT_COUNT_OVER_THRESHOLD_NAME = "ritCountOverThreshold"; String RIT_OLDEST_AGE_NAME = "ritOldestAge"; String RIT_DURATION_NAME = "ritDuration"; - String ASSIGN_TIME_NAME = "assign"; - String UNASSIGN_TIME_NAME = "unassign"; - String BULK_ASSIGN_TIME_NAME = "bulkAssign"; String RIT_COUNT_DESC = "Current number of Regions In Transition (Gauge)."; String RIT_COUNT_OVER_THRESHOLD_DESC = @@ -57,6 +55,45 @@ public interface MetricsAssignmentManagerSource extends BaseSource { String RIT_DURATION_DESC = "Total durations in milliseconds for all Regions in Transition (Histogram)."; + // Region assign metrics + String ASSIGN_SUBMITTED_COUNT_NAME = "assignSubmittedCount"; + String ASSIGN_TIME_NAME = "assignTime"; + String ASSIGN_FAILED_COUNT_NAME = "assignFailedCount"; + + String ASSIGN_SUBMITTED_COUNT_DESC = + "Total number of assign region requests submitted (Counter)."; + String ASSIGN_TIME_DESC = + "Time in milliseconds of successfully completed region assignments (Histogram)"; + String ASSIGN_FAILED_COUNT_DESC = "Total number of failed region assignments (Counter)"; + + // Region unassign metrics + String UNASSIGN_SUBMITTED_COUNT_NAME = "unassignSubmittedCount"; + String UNASSIGN_TIME_NAME = "unassignTime"; + String UNASSIGN_FAILED_COUNT_NAME = "unassignFailedCount"; + + String UNASSIGN_SUBMITTED_COUNT_DESC = + "Total number of unassign region requests submitted (Counter)."; + String UNASSIGN_TIME_DESC = + "Time in milliseconds of successfully completed region unassignments (Histogram)"; + String UNASSIGN_FAILED_COUNT_DESC = "Total number of failed region unassignments (Counter)"; + + // split/ merge metrics + String SPLIT_SUBMITTED_COUNT_NAME = "splitSubmittedCount"; + String SPLIT_TIME_NAME = "splitTime"; + String SPLIT_FAILED_COUNT_NAME = "splitFailedCount"; + + String SPLIT_SUBMITTED_COUNT_DESC = "Total number of split region requests submitted (Counter)."; + String SPLIT_TIME_DESC = "Time in milliseconds of successful region splits (Histogram)."; + String SPLIT_FAILED_COUNT_DESC = "Total number of failed region splits (Counter)."; + + String MERGE_SUBMITTED_COUNT_NAME = "mergeSubmittedCount"; + String MERGE_TIME_NAME = "mergeTime"; + String MERGE_FAILED_COUNT_NAME = "mergeFailedCount"; + + String MERGE_SUBMITTED_COUNT_DESC = "Total number of merge region requests submitted (Counter)."; + String MERGE_TIME_DESC = "Time in milliseconds of successful region merges (Histogram)."; + String MERGE_FAILED_COUNT_DESC = "Total number of failed region merges (Counter)."; + String OPERATION_COUNT_NAME = "operationCount"; /** @@ -83,17 +120,68 @@ public interface MetricsAssignmentManagerSource extends BaseSource { void updateRitDuration(long duration); /** - * Increment the count of assignment operation (assign/unassign). + * TODO: Remove. This may not be needed now as assign and unassign counts are tracked separately + * Increment the count of operations (assign/unassign). */ void incrementOperationCounter(); /** + * Increment the count of assign operations + */ + void incrementAssignSubmittedCounter(); + + /** * Add the time took to perform the last assign operation */ void updateAssignTime(long time); /** + * Increment the count of failed assign operations + */ + void incrementAssignFailedCounter(); + + /** + * Increment the count of unassign operations + */ + void incrementUnassignSubmittedCounter(); + + /** * Add the time took to perform the last unassign operation */ void updateUnassignTime(long time); + + /** + * Increment the count of failed unassign operations + */ + void incrementUnassignFailedCounter(); + + /** + * Increment the count of split operations + */ + void incrementSplitSubmittedCounter(); + + /** + * Add the time took to perform the last split operation + */ + void updateSplitTime(long time); + + /** + * Increment the count of failed split operations + */ + void incrementSplitFailedCounter(); + + /** + * Increment the count of merge operations + */ + void incrementMergeSubmittedCounter(); + + /** + * Add the time took to perform the last merge operation + */ + void updateMergeTime(long time); + + /** + * Increment the count of failed merge operations + */ + void incrementMergeFailedCounter(); } diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java index 6995198..5453b91 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java @@ -75,6 +75,16 @@ public interface MetricsMasterSource extends BaseSource { String SPLIT_PLAN_COUNT_DESC = "Number of Region Split Plans executed"; String MERGE_PLAN_COUNT_DESC = "Number of Region Merge Plans executed"; + String SERVER_CRASH_SUBMITTED_COUNT_NAME = "serverCrashSubmittedCount"; + String SERVER_CRASH_TIME_NAME = "serverCrashTime"; + String SERVER_CRASH_FAILED_COUNT_NAME = "serverCrashFailedCount"; + + String SERVER_CRASH_SUBMITTED_COUNT_DESC = + "Total number of server crash requests submitted (Counter)."; + String SERVER_CRASH_TIME_DESC = + "Time in milliseconds of successful server crash operations (Histogram)."; + String SERVER_CRASH_FAILED_COUNT_DESC = + "Total number of failed server crash operations (Counter)."; /** * Increment the number of requests the cluster has seen. @@ -83,7 +93,18 @@ public interface MetricsMasterSource extends BaseSource { */ void incRequests(final long inc); + /** + * Increment the count of server crash operations + */ + void incrementServerCrashSubmittedCounter(); + /** + * Add the time took to perform the last server crash operation + */ + void updateServerCrashTime(long time); - + /** + * Increment the count of failed server crash operations + */ + void incrementServerCrashFailedCounter(); } diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSourceImpl.java index 14b7e71..1696898 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManagerSourceImpl.java @@ -35,8 +35,22 @@ public class MetricsAssignmentManagerSourceImpl private MetricHistogram ritDurationHisto; private MutableFastCounter operationCounter; + + private MutableFastCounter assignSubmittedCounter; private MetricHistogram assignTimeHisto; + private MutableFastCounter assignFailedCounter; + + private MutableFastCounter unassignSubmittedCounter; private MetricHistogram unassignTimeHisto; + private MutableFastCounter unassignFailedCounter; + + private MutableFastCounter splitSubmittedCounter; + private MetricHistogram splitTimeHisto; + private MutableFastCounter splitFailedCounter; + + private MutableFastCounter mergeSubmittedCounter; + private MetricHistogram mergeTimeHisto; + private MutableFastCounter mergeFailedCounter; public MetricsAssignmentManagerSourceImpl() { this(METRICS_NAME, METRICS_DESCRIPTION, METRICS_CONTEXT, METRICS_JMX_CONTEXT); @@ -53,10 +67,34 @@ public class MetricsAssignmentManagerSourceImpl ritCountOverThresholdGauge = metricsRegistry.newGauge(RIT_COUNT_OVER_THRESHOLD_NAME, RIT_COUNT_OVER_THRESHOLD_DESC,0l); ritOldestAgeGauge = metricsRegistry.newGauge(RIT_OLDEST_AGE_NAME, RIT_OLDEST_AGE_DESC, 0l); - assignTimeHisto = metricsRegistry.newTimeHistogram(ASSIGN_TIME_NAME); - unassignTimeHisto = metricsRegistry.newTimeHistogram(UNASSIGN_TIME_NAME); ritDurationHisto = metricsRegistry.newTimeHistogram(RIT_DURATION_NAME, RIT_DURATION_DESC); operationCounter = metricsRegistry.getCounter(OPERATION_COUNT_NAME, 0l); + + assignSubmittedCounter = metricsRegistry.newCounter(ASSIGN_SUBMITTED_COUNT_NAME, + ASSIGN_SUBMITTED_COUNT_DESC, 0l); + assignTimeHisto = metricsRegistry.newTimeHistogram(ASSIGN_TIME_NAME, ASSIGN_TIME_DESC); + assignFailedCounter = metricsRegistry.newCounter(ASSIGN_FAILED_COUNT_NAME, + ASSIGN_FAILED_COUNT_DESC, 0l); + + unassignSubmittedCounter = metricsRegistry.newCounter(UNASSIGN_SUBMITTED_COUNT_NAME, + UNASSIGN_SUBMITTED_COUNT_DESC, 0l); + unassignTimeHisto = metricsRegistry.newTimeHistogram(UNASSIGN_TIME_NAME, UNASSIGN_TIME_DESC); + unassignFailedCounter = metricsRegistry.newCounter(UNASSIGN_FAILED_COUNT_NAME, + UNASSIGN_FAILED_COUNT_DESC, 0l); + + splitSubmittedCounter = metricsRegistry.newCounter(SPLIT_SUBMITTED_COUNT_NAME, + SPLIT_SUBMITTED_COUNT_DESC, 0l); + splitTimeHisto = metricsRegistry.newTimeHistogram(SPLIT_TIME_NAME, SPLIT_TIME_DESC); + splitFailedCounter = metricsRegistry.newCounter(SPLIT_FAILED_COUNT_NAME, + SPLIT_FAILED_COUNT_DESC, 0l); + + mergeSubmittedCounter = metricsRegistry.newCounter(MERGE_SUBMITTED_COUNT_NAME, + MERGE_SUBMITTED_COUNT_DESC, 0l); + mergeTimeHisto = metricsRegistry.newTimeHistogram(MERGE_TIME_NAME, MERGE_TIME_DESC); + mergeFailedCounter = metricsRegistry.newCounter(MERGE_FAILED_COUNT_NAME, + MERGE_FAILED_COUNT_DESC, 0l); + + } @Override @@ -80,17 +118,67 @@ public class MetricsAssignmentManagerSourceImpl } @Override + public void incrementAssignSubmittedCounter() { + assignSubmittedCounter.incr(); + } + + @Override public void updateAssignTime(final long time) { assignTimeHisto.add(time); } @Override + public void incrementAssignFailedCounter() { + assignFailedCounter.incr(); + } + + @Override + public void incrementUnassignSubmittedCounter() { + unassignSubmittedCounter.incr(); + } + + @Override public void updateUnassignTime(final long time) { unassignTimeHisto.add(time); } @Override + public void incrementUnassignFailedCounter() { + unassignFailedCounter.incr(); + } + + @Override public void updateRitDuration(long duration) { ritDurationHisto.add(duration); } + + @Override + public void incrementSplitSubmittedCounter() { + splitSubmittedCounter.incr(); + } + + @Override + public void updateSplitTime(long time) { + splitTimeHisto.add(time); + } + + @Override + public void incrementSplitFailedCounter() { + splitFailedCounter.incr(); + } + + @Override + public void incrementMergeSubmittedCounter() { + mergeSubmittedCounter.incr(); + } + + @Override + public void updateMergeTime(long time) { + mergeTimeHisto.add(time); + } + + @Override + public void incrementMergeFailedCounter() { + mergeFailedCounter.incr(); + } } diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java index fc859e5..1454769 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.master; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.metrics.BaseSourceImpl; import org.apache.hadoop.hbase.metrics.Interns; +import org.apache.hadoop.metrics2.MetricHistogram; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.lib.MutableFastCounter; @@ -37,6 +38,10 @@ public class MetricsMasterSourceImpl private final MetricsMasterWrapper masterWrapper; private MutableFastCounter clusterRequestsCounter; + private MutableFastCounter serverCrashSubmittedCounter; + private MetricHistogram serverCrashTimeHisto; + private MutableFastCounter serverCrashFailedCounter; + public MetricsMasterSourceImpl(MetricsMasterWrapper masterWrapper) { this(METRICS_NAME, METRICS_DESCRIPTION, @@ -59,6 +64,13 @@ public class MetricsMasterSourceImpl public void init() { super.init(); clusterRequestsCounter = metricsRegistry.newCounter(CLUSTER_REQUESTS_NAME, "", 0l); + serverCrashSubmittedCounter = metricsRegistry.newCounter(SERVER_CRASH_SUBMITTED_COUNT_NAME, + SERVER_CRASH_SUBMITTED_COUNT_DESC, 0l); + serverCrashTimeHisto = metricsRegistry.newTimeHistogram(SERVER_CRASH_TIME_NAME, + SERVER_CRASH_TIME_DESC); + serverCrashFailedCounter = metricsRegistry.newCounter(SERVER_CRASH_FAILED_COUNT_NAME, + SERVER_CRASH_FAILED_COUNT_DESC, 0l); + } @Override @@ -105,4 +117,18 @@ public class MetricsMasterSourceImpl metricsRegistry.snapshot(metricsRecordBuilder, all); } + @Override + public void incrementServerCrashSubmittedCounter() { + serverCrashSubmittedCounter.incr(); + } + + @Override + public void updateServerCrashTime(long time) { + serverCrashTimeHisto.add(time); + } + + @Override + public void incrementServerCrashFailedCounter() { + serverCrashFailedCounter.incr(); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 6679159..0370a5c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -635,7 +635,8 @@ public class HMaster extends HRegionServer implements MasterServices { return MasterDumpServlet.class; } - MetricsMaster getMasterMetrics() { + @Override + public MetricsMaster getMasterMetrics() { return metricsMaster; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index 5a45fcf..1049c40 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -125,6 +125,11 @@ public interface MasterServices extends Server { ProcedureExecutor getMasterProcedureExecutor(); /** + * @return Master's instnace of {@link MetricsMaster} + */ + MetricsMaster getMasterMetrics(); + + /** * Check table is modifiable; i.e. exists and is offline. * @param tableName Name of table to check. * @throws TableNotDisabledException diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManager.java index c7ce9a9..0c387a1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsAssignmentManager.java @@ -66,6 +66,7 @@ public class MetricsAssignmentManager { } /* + * TODO: Remove. This may not be required as assign and unassign operations are tracked separately * Increment the count of assignment operation (assign/unassign). */ public void incrementOperationCounter() { @@ -73,6 +74,13 @@ public class MetricsAssignmentManager { } /** + * Increment the count of assign operations + */ + public void incrementAssignSubmittedCounter() { + assignmentManagerSource.incrementAssignSubmittedCounter(); + } + + /** * Add the time took to perform the last assign operation * @param time */ @@ -81,10 +89,75 @@ public class MetricsAssignmentManager { } /** + * Increment the count of failed assign operations + */ + public void incrementAssignFailedCounter() { + assignmentManagerSource.incrementAssignFailedCounter(); + } + + /** + * Increment the count of unassign operations + */ + public void incrementUnassignSubmittedCounter() { + assignmentManagerSource.incrementUnassignSubmittedCounter(); + } + + /** * Add the time took to perform the last unassign operation * @param time */ public void updateUnassignTime(final long time) { assignmentManagerSource.updateUnassignTime(time); } + + /** + * Increment the count of failed unassign operations + */ + public void incrementUnassignFailedCounter() { + assignmentManagerSource.incrementUnassignFailedCounter(); + } + + + /** + * Increment the count of split operations + */ + public void incrementSplitSubmittedCounter() { + assignmentManagerSource.incrementSplitSubmittedCounter(); + } + + /** + * Add the time took to perform the last split operation + */ + public void updateSplitTime(long time) { + assignmentManagerSource.updateSplitTime(time); + } + + /** + * Increment the count of failed split operations + */ + public void incrementSplitFailedCounter() { + assignmentManagerSource.incrementSplitFailedCounter(); + } + + /** + * Increment the count of merge operations + */ + public void incrementMergeSubmittedCounter() { + assignmentManagerSource.incrementMergeSubmittedCounter(); + } + + /** + * Add the time took to perform the last merge operation + */ + public void updateMergeTime(long time) { + assignmentManagerSource.updateMergeTime(time); + } + + /** + * Increment the count of failed merge operations + */ + public void incrementMergeFailedCounter() { + assignmentManagerSource.incrementMergeFailedCounter(); + } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java index d055853..2a4e8a9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java @@ -59,4 +59,25 @@ public class MetricsMaster { public void incrementRequests(final long inc) { masterSource.incRequests(inc); } + + /** + * Increment the count of server crash operations + */ + public void incrementServerCrashSubmittedCounter() { + masterSource.incrementServerCrashSubmittedCounter(); + } + + /** + * Add the time took to perform the last server crash operation + */ + public void updateServerCrashTime(long time) { + masterSource.updateServerCrashTime(time); + } + + /** + * Increment the count of failed server crash operations + */ + public void incrementServerCrashFailedCounter() { + masterSource.incrementServerCrashFailedCounter(); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java index 1bd67e1..f23e580 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; +import org.apache.hadoop.hbase.master.MetricsAssignmentManager; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionOpenOperation; @@ -298,4 +299,24 @@ public class AssignProcedure extends RegionTransitionProcedure { public String toString() { return super.toString() + ", server=" + this.targetServer; } + + @Override + protected void updateMetricsOnSubmit(MasterProcedureEnv env) { + super.updateMetricsOnSubmit(env); + MetricsAssignmentManager metricsAssignmentManager = + env.getAssignmentManager().getAssignmentManagerMetrics(); + metricsAssignmentManager.incrementAssignSubmittedCounter(); + } + + @Override + protected void updateMetricsOnFinish(MasterProcedureEnv env, long runtime, boolean success) { + super.updateMetricsOnFinish(env, runtime, success); + MetricsAssignmentManager metricsAssignmentManager = + env.getAssignmentManager().getAssignmentManagerMetrics(); + if (success) { + metricsAssignmentManager.updateAssignTime(runtime); + } else { + metricsAssignmentManager.incrementAssignFailedCounter(); + } + } } \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 378db63..8fcc2e2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1453,10 +1453,6 @@ public class AssignmentManager implements ServerListener { regionStateStore.updateRegionLocation(regionNode.getRegionInfo(), state, regionNode.getRegionLocation(), regionNode.getLastHost(), regionNode.getOpenSeqNum()); sendRegionOpenedNotification(hri, regionNode.getRegionLocation()); - // update assignment metrics - if (regionNode.getProcedure() != null) { - metrics.updateAssignTime(regionNode.getProcedure().elapsedTime()); - } } } @@ -1488,10 +1484,6 @@ public class AssignmentManager implements ServerListener { regionNode.getRegionLocation()/*null*/, regionNode.getLastHost(), HConstants.NO_SEQNUM); sendRegionClosedNotification(hri); - // Update assignment metrics - if (regionNode.getProcedure() != null) { - metrics.updateUnassignTime(regionNode.getProcedure().elapsedTime()); - } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index 27a98f5..8a62ffc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -45,7 +45,7 @@ import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.exceptions.MergeRegionException; import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.master.assignment.RegionStates; +import org.apache.hadoop.hbase.master.MetricsAssignmentManager; import org.apache.hadoop.hbase.master.CatalogJanitor; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.MasterFileSystem; @@ -410,6 +410,26 @@ public class MergeTableRegionsProcedure return TableOperationType.MERGE; } + @Override + protected void updateMetricsOnSubmit(MasterProcedureEnv env) { + super.updateMetricsOnSubmit(env); + MetricsAssignmentManager metricsAssignmentManager = + env.getAssignmentManager().getAssignmentManagerMetrics(); + metricsAssignmentManager.incrementMergeSubmittedCounter(); + } + + @Override + protected void updateMetricsOnFinish(MasterProcedureEnv env, long runtime, boolean success) { + super.updateMetricsOnFinish(env, runtime, success); + MetricsAssignmentManager metricsAssignmentManager = + env.getAssignmentManager().getAssignmentManagerMetrics(); + if (success) { + metricsAssignmentManager.updateMergeTime(runtime); + } else { + metricsAssignmentManager.incrementMergeFailedCounter(); + } + } + /** * Prepare merge and do some check * @param env MasterProcedureEnv diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 6ff1ccf..d12ed64 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.MetricsAssignmentManager; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; @@ -344,6 +345,26 @@ public class SplitTableRegionProcedure return TableOperationType.SPLIT; } + @Override + protected void updateMetricsOnSubmit(MasterProcedureEnv env) { + super.updateMetricsOnSubmit(env); + MetricsAssignmentManager metricsAssignmentManager = + env.getAssignmentManager().getAssignmentManagerMetrics(); + metricsAssignmentManager.incrementSplitSubmittedCounter(); + } + + @Override + protected void updateMetricsOnFinish(MasterProcedureEnv env, long runtime, boolean success) { + super.updateMetricsOnFinish(env, runtime, success); + MetricsAssignmentManager metricsAssignmentManager = + env.getAssignmentManager().getAssignmentManagerMetrics(); + if (success) { + metricsAssignmentManager.updateSplitTime(runtime); + } else { + metricsAssignmentManager.incrementSplitFailedCounter(); + } + } + private byte[] getSplitRow() { return daughter_2_HRI.getStartKey(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java index 004f3dc..033d617 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; +import org.apache.hadoop.hbase.master.MetricsAssignmentManager; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionCloseOperation; @@ -216,4 +217,24 @@ public class UnassignProcedure extends RegionTransitionProcedure { public String toString() { return super.toString() + ", server=" + this.destinationServer; } + + @Override + protected void updateMetricsOnSubmit(MasterProcedureEnv env) { + super.updateMetricsOnSubmit(env); + MetricsAssignmentManager metricsAssignmentManager = + env.getAssignmentManager().getAssignmentManagerMetrics(); + metricsAssignmentManager.incrementUnassignSubmittedCounter(); + } + + @Override + protected void updateMetricsOnFinish(MasterProcedureEnv env, long runtime, boolean success) { + super.updateMetricsOnFinish(env, runtime, success); + MetricsAssignmentManager metricsAssignmentManager = + env.getAssignmentManager().getAssignmentManagerMetrics(); + if (success) { + metricsAssignmentManager.updateUnassignTime(runtime); + } else { + metricsAssignmentManager.incrementUnassignFailedCounter(); + } + } } \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 59cda0c..e42c786 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.MasterWalManager; +import org.apache.hadoop.hbase.master.MetricsMaster; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; @@ -407,4 +408,22 @@ implements ServerProcedureInterface { // the client does not know about this procedure. return false; } + + @Override + protected void updateMetricsOnSubmit(MasterProcedureEnv env) { + super.updateMetricsOnSubmit(env); + MetricsMaster metricsMaster = env.getMasterServices().getMasterMetrics(); + metricsMaster.incrementServerCrashSubmittedCounter(); + } + + @Override + protected void updateMetricsOnFinish(MasterProcedureEnv env, long runtime, boolean success) { + super.updateMetricsOnFinish(env, runtime, success); + MetricsMaster metricsMaster = env.getMasterServices().getMasterMetrics(); + if (success) { + metricsMaster.updateServerCrashTime(runtime); + } else { + metricsMaster.incrementServerCrashFailedCounter(); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java index e4909f5..756e947 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java @@ -133,6 +133,11 @@ public class MockNoopMasterServices implements MasterServices, Server { } @Override + public MetricsMaster getMasterMetrics() { + return null; + } + + @Override public ServerManager getServerManager() { return null; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java index 00d54bf..b9e1f8a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java @@ -38,6 +38,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CategoryBasedTimeout; +import org.apache.hadoop.hbase.CompatibilityFactory; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; @@ -46,6 +47,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.MetricsAssignmentManagerSource; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler; @@ -68,6 +70,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; +import org.apache.hadoop.hbase.test.MetricsAssertHelper; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; @@ -86,6 +89,9 @@ import org.junit.rules.TestRule; @Category({MasterTests.class, MediumTests.class}) public class TestAssignmentManager { private static final Log LOG = LogFactory.getLog(TestAssignmentManager.class); + private static final MetricsAssertHelper metricsHelper = CompatibilityFactory + .getInstance(MetricsAssertHelper.class); + static { Logger.getLogger(MasterProcedureScheduler.class).setLevel(Level.TRACE); } @@ -105,6 +111,12 @@ public class TestAssignmentManager { private NavigableMap> regionsToRegionServers = new ConcurrentSkipListMap>(); + private MetricsAssignmentManagerSource source; + private long assignSubmittedCount = 0; + private long assignFailedCount = 0; + private long unassignSubmittedCount = 0; + private long unassignFailedCount = 0; + private void setupConfiguration(Configuration conf) throws Exception { FSUtils.setRootDir(conf, UTIL.getDataTestDir()); conf.setBoolean(WALProcedureStore.USE_HSYNC_CONF_KEY, false); @@ -122,6 +134,7 @@ public class TestAssignmentManager { rsDispatcher = new MockRSProcedureDispatcher(master); master.start(NSERVERS, rsDispatcher); am = master.getAssignmentManager(); + source = am.getAssignmentManagerMetrics().getMetricsProcSource(); setUpMeta(); } @@ -139,7 +152,15 @@ public class TestAssignmentManager { @Test public void testAssignWithGoodExec() throws Exception { + // collect AM metrics before test + collectAssignmentManagerMetrics(); + testAssign(new GoodRsExecutor()); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + NREGIONS, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount, source); } @Test @@ -159,11 +180,23 @@ public class TestAssignmentManager { final TableName tableName = TableName.valueOf(this.name.getMethodName()); final HRegionInfo hri = createRegionInfo(tableName, 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + rsDispatcher.setMockRsExecutor(new SocketTimeoutRsExecutor(20, 3)); waitOnFuture(submitProcedure(am.createAssignProcedure(hri, false))); rsDispatcher.setMockRsExecutor(new SocketTimeoutRsExecutor(20, 3)); waitOnFuture(submitProcedure(am.createUnassignProcedure(hri, null, false))); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_SUBMITTED_COUNT_NAME, + unassignSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_FAILED_COUNT_NAME, + unassignFailedCount, source); } @Test @@ -176,6 +209,9 @@ public class TestAssignmentManager { final MockRSExecutor executor) throws Exception { final HRegionInfo hri = createRegionInfo(tableName, 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Test Assign operation failure rsDispatcher.setMockRsExecutor(executor); try { @@ -193,19 +229,47 @@ public class TestAssignmentManager { // Test Unassign operation failure rsDispatcher.setMockRsExecutor(executor); waitOnFuture(submitProcedure(am.createUnassignProcedure(hri, null, false))); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + 2, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_SUBMITTED_COUNT_NAME, + unassignSubmittedCount + 1, source); + + // TODO: We supposed to have 1 failed assign, 1 successful assign and a failed unassign + // operation. But ProcV2 framework marks aborted unassign operation as success. Fix it! + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_FAILED_COUNT_NAME, + unassignFailedCount, source); } @Test public void testIOExceptionOnAssignment() throws Exception { + // collect AM metrics before test + collectAssignmentManagerMetrics(); + testFailedOpen(TableName.valueOf("testExceptionOnAssignment"), new FaultyRsExecutor(new IOException("test fault"))); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount + 1, source); } @Test public void testDoNotRetryExceptionOnAssignment() throws Exception { + // collect AM metrics before test + collectAssignmentManagerMetrics(); + testFailedOpen(TableName.valueOf("testDoNotRetryExceptionOnAssignment"), new FaultyRsExecutor(new DoNotRetryIOException("test do not retry fault"))); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount + 1, source); } private void testFailedOpen(final TableName tableName, @@ -253,6 +317,9 @@ public class TestAssignmentManager { final TableName tableName = TableName.valueOf("testAssignAnAssignedRegion"); final HRegionInfo hri = createRegionInfo(tableName, 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + rsDispatcher.setMockRsExecutor(new GoodRsExecutor()); final Future futureA = submitProcedure(am.createAssignProcedure(hri, false)); @@ -267,6 +334,14 @@ public class TestAssignmentManager { waitOnFuture(futureB); am.getRegionStates().isRegionInState(hri, State.OPEN); // TODO: What else can we do to ensure just a noop. + + // TODO: Though second assign is noop, it's considered success, can noop be handled in a + // better way? + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + 2, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount, source); + } @Test @@ -274,6 +349,9 @@ public class TestAssignmentManager { final TableName tableName = TableName.valueOf("testUnassignAnUnassignedRegion"); final HRegionInfo hri = createRegionInfo(tableName, 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + rsDispatcher.setMockRsExecutor(new GoodRsExecutor()); // assign the region first @@ -293,6 +371,17 @@ public class TestAssignmentManager { // Ensure we are still CLOSED. am.getRegionStates().isRegionInState(hri, State.CLOSED); // TODO: What else can we do to ensure just a noop. + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount, source); + // TODO: Though second unassign is noop, it's considered success, can noop be handled in a + // better way? + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_SUBMITTED_COUNT_NAME, + unassignSubmittedCount + 2, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_FAILED_COUNT_NAME, + unassignFailedCount, source); } private Future submitProcedure(final Procedure proc) { @@ -566,4 +655,16 @@ public class TestAssignmentManager { } } } + + private void collectAssignmentManagerMetrics() { + assignSubmittedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, source); + assignFailedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, source); + + unassignSubmittedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.UNASSIGN_SUBMITTED_COUNT_NAME, source); + unassignFailedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.UNASSIGN_FAILED_COUNT_NAME, source); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java index 8be1be9..846f859 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestMergeTableRegionsProcedure.java @@ -26,6 +26,7 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CompatibilityFactory; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; @@ -33,11 +34,13 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.master.MetricsAssignmentManagerSource; import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.test.MetricsAssertHelper; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; @@ -53,6 +56,8 @@ import org.junit.experimental.categories.Category; @Ignore // Fix for AMv2. public class TestMergeTableRegionsProcedure { private static final Log LOG = LogFactory.getLog(TestMergeTableRegionsProcedure.class); + private static final MetricsAssertHelper metricsHelper = CompatibilityFactory + .getInstance(MetricsAssertHelper.class); protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); private static long nonceGroup = HConstants.NO_NONCE; @@ -63,6 +68,15 @@ public class TestMergeTableRegionsProcedure { final static Configuration conf = UTIL.getConfiguration(); private static Admin admin; + private AssignmentManager am; + private MetricsAssignmentManagerSource source; + private long mergeSubmittedCount = 0; + private long mergeFailedCount = 0; + private long assignSubmittedCount = 0; + private long assignFailedCount = 0; + private long unassignSubmittedCount = 0; + private long unassignFailedCount = 0; + private static void setupConf(Configuration conf) { // Reduce the maximum attempts to speed up the test conf.setInt("hbase.assignment.maximum.attempts", 3); @@ -99,6 +113,8 @@ public class TestMergeTableRegionsProcedure { // Turn off the meta scanner so it don't remove parent on us. UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); resetProcExecutorTestingKillFlag(); + am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); + source = am.getAssignmentManagerMetrics().getMetricsProcSource(); } @After @@ -130,12 +146,28 @@ public class TestMergeTableRegionsProcedure { regionsToMerge[0] = tableRegions.get(0); regionsToMerge[1] = tableRegions.get(1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + long procId = procExec.submitProcedure(new MergeTableRegionsProcedure( procExec.getEnvironment(), regionsToMerge, true)); ProcedureTestingUtility.waitProcedure(procExec, procId); ProcedureTestingUtility.assertProcNotFailed(procExec, procId); assertRegionCount(tableName, initialRegionCount - 1); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.MERGE_SUBMITTED_COUNT_NAME, + mergeSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.MERGE_FAILED_COUNT_NAME, + mergeFailedCount, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_SUBMITTED_COUNT_NAME, + unassignSubmittedCount + 2, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_FAILED_COUNT_NAME, + unassignFailedCount, source); } /** @@ -155,6 +187,9 @@ public class TestMergeTableRegionsProcedure { regionsToMerge2[0] = tableRegions.get(2); regionsToMerge2[1] = tableRegions.get(3); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + long procId1 = procExec.submitProcedure(new MergeTableRegionsProcedure( procExec.getEnvironment(), regionsToMerge1, true)); long procId2 = procExec.submitProcedure(new MergeTableRegionsProcedure( @@ -164,6 +199,19 @@ public class TestMergeTableRegionsProcedure { ProcedureTestingUtility.assertProcNotFailed(procExec, procId1); ProcedureTestingUtility.assertProcNotFailed(procExec, procId2); assertRegionCount(tableName, initialRegionCount - 2); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.MERGE_SUBMITTED_COUNT_NAME, + mergeSubmittedCount + 2, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.MERGE_FAILED_COUNT_NAME, + mergeFailedCount, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + 2, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_SUBMITTED_COUNT_NAME, + unassignSubmittedCount + 4, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_FAILED_COUNT_NAME, + unassignFailedCount, source); } @Test(timeout=60000) @@ -237,4 +285,21 @@ public class TestMergeTableRegionsProcedure { private ProcedureExecutor getMasterProcedureExecutor() { return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); } + + private void collectAssignmentManagerMetrics() { + mergeSubmittedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.MERGE_SUBMITTED_COUNT_NAME, source); + mergeFailedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.MERGE_FAILED_COUNT_NAME, source); + + assignSubmittedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, source); + assignFailedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, source); + unassignSubmittedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.UNASSIGN_SUBMITTED_COUNT_NAME, source); + unassignFailedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.UNASSIGN_FAILED_COUNT_NAME, source); + + } } \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java index 7af9d67..4f2d0a8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java @@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CategoryBasedTimeout; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.CompatibilityFactory; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; @@ -44,12 +45,14 @@ import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.master.MetricsAssignmentManagerSource; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.test.MetricsAssertHelper; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; @@ -66,6 +69,9 @@ import org.junit.rules.TestRule; @Category({MasterTests.class, MediumTests.class}) public class TestSplitTableRegionProcedure { private static final Log LOG = LogFactory.getLog(TestSplitTableRegionProcedure.class); + private static final MetricsAssertHelper metricsHelper = CompatibilityFactory + .getInstance(MetricsAssertHelper.class); + @Rule public final TestRule timeout = CategoryBasedTimeout.builder().withTimeout(this.getClass()). withLookingForStuckThread(true).build(); @@ -77,6 +83,15 @@ public class TestSplitTableRegionProcedure { private static final int startRowNum = 11; private static final int rowCount = 60; + private AssignmentManager am; + private MetricsAssignmentManagerSource source; + private long splitSubmittedCount = 0; + private long splitFailedCount = 0; + private long assignSubmittedCount = 0; + private long assignFailedCount = 0; + private long unassignSubmittedCount = 0; + private long unassignFailedCount = 0; + @Rule public TestName name = new TestName(); @@ -108,6 +123,8 @@ public class TestSplitTableRegionProcedure { UTIL.getAdmin().setBalancerRunning(false, true); // Turn off the meta scanner so it don't remove parent on us. UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); + am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); + source = am.getAssignmentManagerMetrics().getMetricsProcSource(); } @After @@ -132,6 +149,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -140,7 +160,20 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.assertProcNotFailed(procExec, procId); verify(tableName, splitRowNum); - } + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_SUBMITTED_COUNT_NAME, + splitSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_FAILED_COUNT_NAME, + splitFailedCount, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, + assignSubmittedCount + 2, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, + assignFailedCount, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_SUBMITTED_COUNT_NAME, + unassignSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.UNASSIGN_FAILED_COUNT_NAME, + unassignFailedCount, source); +} @Test(timeout=60000) public void testSplitTableRegionNoStoreFile() throws Exception { @@ -155,6 +188,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -164,6 +200,11 @@ public class TestSplitTableRegionProcedure { assertTrue(UTIL.getMiniHBaseCluster().getRegions(tableName).size() == 2); assertTrue(UTIL.countRows(tableName) == 0); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_SUBMITTED_COUNT_NAME, + splitSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_FAILED_COUNT_NAME, + splitFailedCount, source); } @Test(timeout=60000) @@ -181,6 +222,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -189,6 +233,11 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.assertProcNotFailed(procExec, procId); verify(tableName, splitRowNum); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_SUBMITTED_COUNT_NAME, + splitSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_FAILED_COUNT_NAME, + splitFailedCount, source); } @Test(timeout=60000) @@ -206,6 +255,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -218,6 +270,11 @@ public class TestSplitTableRegionProcedure { assertTrue(daughters.size() == 2); assertTrue(UTIL.countRows(tableName) == rowCount); assertTrue(UTIL.countRows(daughters.get(0)) == 0 || UTIL.countRows(daughters.get(1)) == 0); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_SUBMITTED_COUNT_NAME, + splitSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_FAILED_COUNT_NAME, + splitFailedCount, source); } @Test(timeout=60000) @@ -236,6 +293,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -258,6 +318,11 @@ public class TestSplitTableRegionProcedure { final int currentRowCount = splitRowNum - startRowNum; assertTrue(UTIL.countRows(tableName) == currentRowCount); assertTrue(UTIL.countRows(daughters.get(0)) == 0 || UTIL.countRows(daughters.get(1)) == 0); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_SUBMITTED_COUNT_NAME, + splitSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_FAILED_COUNT_NAME, + splitFailedCount, source); } @Test(timeout=60000) @@ -272,6 +337,9 @@ public class TestSplitTableRegionProcedure { assertTrue("not able to find a splittable region", regions != null); assertTrue("not able to find a splittable region", regions.length == 1); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table with null split key try { long procId1 = procExec.submitProcedure( @@ -281,6 +349,11 @@ public class TestSplitTableRegionProcedure { } catch (DoNotRetryIOException e) { LOG.debug("Expected Split procedure construction failure: " + e.getMessage()); } + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_SUBMITTED_COUNT_NAME, + splitSubmittedCount, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_FAILED_COUNT_NAME, + splitFailedCount, source); } @Test(timeout = 60000) @@ -299,6 +372,9 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.waitNoProcedureRunning(procExec); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -315,6 +391,11 @@ public class TestSplitTableRegionProcedure { assertEquals(1, daughters.size()); verifyData(daughters.get(0), startRowNum, rowCount, Bytes.toBytes(ColumnFamilyName1), Bytes.toBytes(ColumnFamilyName2)); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_SUBMITTED_COUNT_NAME, + splitSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_FAILED_COUNT_NAME, + splitFailedCount + 1, source); } @Test(timeout=60000) @@ -333,6 +414,9 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.waitNoProcedureRunning(procExec); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); + // collect AM metrics before test + collectAssignmentManagerMetrics(); + // Split region of the table long procId = procExec.submitProcedure( new SplitTableRegionProcedure(procExec.getEnvironment(), regions[0], splitKey)); @@ -342,6 +426,11 @@ public class TestSplitTableRegionProcedure { ProcedureTestingUtility.assertProcNotFailed(procExec, procId); verify(tableName, splitRowNum); + + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_SUBMITTED_COUNT_NAME, + splitSubmittedCount + 1, source); + metricsHelper.assertCounter(MetricsAssignmentManagerSource.SPLIT_FAILED_COUNT_NAME, + splitFailedCount, source); } private void insertData(final TableName tableName) throws IOException, InterruptedException { @@ -424,4 +513,20 @@ public class TestSplitTableRegionProcedure { private ProcedureExecutor getMasterProcedureExecutor() { return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); } + + private void collectAssignmentManagerMetrics() { + splitSubmittedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.SPLIT_SUBMITTED_COUNT_NAME, source); + splitFailedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.SPLIT_FAILED_COUNT_NAME, source); + assignSubmittedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.ASSIGN_SUBMITTED_COUNT_NAME, source); + assignFailedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.ASSIGN_FAILED_COUNT_NAME, source); + unassignSubmittedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.UNASSIGN_SUBMITTED_COUNT_NAME, source); + unassignFailedCount = metricsHelper.getCounter( + MetricsAssignmentManagerSource.UNASSIGN_FAILED_COUNT_NAME, source); + + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java index 8cee4d8..a9e98fa 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java @@ -24,6 +24,7 @@ import static org.junit.Assert.assertTrue; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CompatibilityFactory; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.MiniHBaseCluster; @@ -31,9 +32,11 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.MetricsMasterSource; import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.test.MetricsAssertHelper; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.junit.After; @@ -46,8 +49,15 @@ import org.junit.experimental.categories.Category; public class TestServerCrashProcedure { private static final Log LOG = LogFactory.getLog(TestServerCrashProcedure.class); + private static final MetricsAssertHelper metricsHelper = CompatibilityFactory + .getInstance(MetricsAssertHelper.class); + private HBaseTestingUtility util; + private MetricsMasterSource source; + private long serverCrashSubmittedCount = 0; + private long serverCrashFailedCount = 0; + private void setupConf(Configuration conf) { conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); conf.set("hbase.balancer.tablesOnMaster", "none"); @@ -61,6 +71,7 @@ public class TestServerCrashProcedure { this.util.startMiniCluster(3); ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate( this.util.getHBaseCluster().getMaster().getMasterProcedureExecutor(), false); + source = this.util.getHBaseCluster().getMaster().getMasterMetrics().getMetricsSource(); } @After @@ -141,4 +152,11 @@ public class TestServerCrashProcedure { t.close(); } } + + private void collectMasterMetrics() { + serverCrashSubmittedCount = metricsHelper.getCounter( + MetricsMasterSource.SERVER_CRASH_SUBMITTED_COUNT_NAME, source); + serverCrashFailedCount = metricsHelper.getCounter( + MetricsMasterSource.SERVER_CRASH_FAILED_COUNT_NAME, source); + } } -- 2.10.1 (Apple Git-78)