From 28912ebaad824d02da95cdd731f97e7b572fc49e Mon Sep 17 00:00:00 2001 From: Elliott Clark Date: Wed, 15 Apr 2015 13:49:55 -0700 Subject: [PATCH] HBASE-13477 Create metrics on failed requests Summary: Add metrics on how many requests are exceptions and what type. Test Plan: behold unit tests. Differential Revision: https://reviews.facebook.net/D37167 --- .../hadoop/hbase/ipc/MetricsHBaseServerSource.java | 22 +++++++++ .../hbase/ipc/MetricsHBaseServerSourceImpl.java | 52 ++++++++++++++++++++++ .../hadoop/hbase/ipc/MetricsHBaseServer.java | 36 +++++++++++++++ .../org/apache/hadoop/hbase/ipc/RpcServer.java | 10 ++++- .../apache/hadoop/hbase/ipc/TestRpcMetrics.java | 18 ++++++++ 5 files changed, 137 insertions(+), 1 deletion(-) diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSource.java index 1f4c950..482fdba 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSource.java @@ -58,6 +58,16 @@ public interface MetricsHBaseServerSource extends BaseSource { String NUM_ACTIVE_HANDLER_NAME = "numActiveHandler"; String NUM_ACTIVE_HANDLER_DESC = "Number of active rpc handlers."; + String EXCEPTIONS_NAME="exceptions"; + String EXCEPTIONS_DESC="Exceptions caused by requests"; + String EXCEPTIONS_TYPE_DESC="Number of requests that resulted in the specified type of Exception"; + String EXCEPTIONS_OOO_NAME="exceptions.OutOfOrderScannerNextException"; + String EXCEPTIONS_BUSY_NAME="exceptions.RegionTooBusyException"; + String EXCEPTIONS_UNKNOWN_NAME="exceptions.UnknownScannerException"; + String EXCEPTIONS_SANITY_NAME="exceptions.FailedSanityCheckException"; + String EXCEPTIONS_MOVED_NAME="exceptions.RegionMovedException"; + String EXCEPTIONS_NSRE_NAME="exceptions.NotServingRegionException"; + void authorizationSuccess(); void authorizationFailure(); @@ -66,6 +76,18 @@ public interface MetricsHBaseServerSource extends BaseSource { void authenticationFailure(); + void exception(); + + /** + * Different types of exceptions + */ + void outOfOrderException(); + void failedSanityException(); + void movedRegionException(); + void notServingRegionException(); + void unknownScannerException(); + void tooBusyException(); + void sentBytes(long count); void receivedBytes(int count); diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceImpl.java index 8eefb08..7fa27bf 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceImpl.java @@ -38,6 +38,16 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl private final MutableCounterLong authenticationFailures; private final MutableCounterLong sentBytes; private final MutableCounterLong receivedBytes; + + private final MutableCounterLong exceptions; + private final MutableCounterLong exceptionsOOO; + private final MutableCounterLong exceptionsBusy; + private final MutableCounterLong exceptionsUnknown; + private final MutableCounterLong exceptionsSanity; + private final MutableCounterLong exceptionsNSRE; + private final MutableCounterLong exceptionsMoved; + + private MutableHistogram queueCallTime; private MutableHistogram processCallTime; private MutableHistogram totalCallTime; @@ -54,6 +64,13 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl AUTHORIZATION_SUCCESSES_DESC, 0l); this.authorizationFailures = this.getMetricsRegistry().newCounter(AUTHORIZATION_FAILURES_NAME, AUTHORIZATION_FAILURES_DESC, 0l); + this.exceptions = this.getMetricsRegistry().newCounter(EXCEPTIONS_NAME,EXCEPTIONS_DESC, 0l); + this.exceptionsOOO = this.getMetricsRegistry().newCounter(EXCEPTIONS_OOO_NAME,EXCEPTIONS_TYPE_DESC, 0l); + this.exceptionsBusy = this.getMetricsRegistry().newCounter(EXCEPTIONS_BUSY_NAME,EXCEPTIONS_TYPE_DESC, 0l); + this.exceptionsUnknown = this.getMetricsRegistry().newCounter(EXCEPTIONS_UNKNOWN_NAME,EXCEPTIONS_TYPE_DESC, 0l); + this.exceptionsSanity = this.getMetricsRegistry().newCounter(EXCEPTIONS_SANITY_NAME,EXCEPTIONS_TYPE_DESC, 0l); + this.exceptionsMoved = this.getMetricsRegistry().newCounter(EXCEPTIONS_MOVED_NAME,EXCEPTIONS_TYPE_DESC, 0l); + this.exceptionsNSRE = this.getMetricsRegistry().newCounter(EXCEPTIONS_NSRE_NAME,EXCEPTIONS_TYPE_DESC, 0l); this.authenticationSuccesses = this.getMetricsRegistry().newCounter( AUTHENTICATION_SUCCESSES_NAME, AUTHENTICATION_SUCCESSES_DESC, 0l); @@ -87,6 +104,41 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl } @Override + public void exception() { + exceptions.incr(); + } + + @Override + public void outOfOrderException() { + exceptionsOOO.incr(); + } + + @Override + public void failedSanityException() { + exceptionsSanity.incr(); + } + + @Override + public void movedRegionException() { + exceptionsMoved.incr(); + } + + @Override + public void notServingRegionException() { + exceptionsNSRE.incr(); + } + + @Override + public void unknownScannerException() { + exceptionsUnknown.incr(); + } + + @Override + public void tooBusyException() { + exceptionsBusy.incr(); + } + + @Override public void authenticationSuccess() { authenticationSuccesses.incr(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServer.java index 825e688..b5c9d1a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServer.java @@ -19,8 +19,16 @@ package org.apache.hadoop.hbase.ipc; +import org.apache.hadoop.hbase.DoNotRetryIOException; +import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.RegionTooBusyException; +import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.CompatibilitySingletonFactory; +import org.apache.hadoop.hbase.client.DoNotRetryRegionException; +import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException; +import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException; +import org.apache.hadoop.hbase.exceptions.RegionMovedException; @InterfaceAudience.Private public class MetricsHBaseServer { @@ -67,6 +75,34 @@ public class MetricsHBaseServer { source.queuedAndProcessedCall(totalTime); } + void exception(Throwable throwable) { + source.exception(); + + /** + * Keep some metrics for commonly seen exceptions + * + * Try and put the most common types first. + * Place child types before the parent type that they extend. + * + * If this gets much larger we might have to go to a hashmap + */ + if (throwable != null) { + if (throwable instanceof OutOfOrderScannerNextException) { + source.outOfOrderException(); + } else if (throwable instanceof RegionTooBusyException) { + source.tooBusyException(); + } else if (throwable instanceof UnknownScannerException) { + source.unknownScannerException(); + } else if (throwable instanceof RegionMovedException) { + source.movedRegionException(); + } else if (throwable instanceof NotServingRegionException ) { + source.notServingRegionException(); + } else if (throwable instanceof FailedSanityCheckException) { + source.failedSanityException(); + } + } + } + public MetricsHBaseServerSource getMetricsSource() { return source; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java index c69a187..209db79 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java @@ -158,6 +158,8 @@ import com.google.protobuf.TextFormat; @InterfaceStability.Evolving public class RpcServer implements RpcServerInterface { public static final Log LOG = LogFactory.getLog(RpcServer.class); + private static final CallQueueTooBigException CALL_QUEUE_TOO_BIG_EXCEPTION + = new CallQueueTooBigException(); private final boolean authorize; private boolean isSecurityEnabled; @@ -1787,7 +1789,8 @@ public class RpcServer implements RpcServerInterface { new Call(id, this.service, null, null, null, null, this, responder, totalRequestSize, null, null); ByteArrayOutputStream responseBuffer = new ByteArrayOutputStream(); - setupResponse(responseBuffer, callTooBig, new CallQueueTooBigException(), + metrics.exception(CALL_QUEUE_TOO_BIG_EXCEPTION); + setupResponse(responseBuffer, callTooBig, CALL_QUEUE_TOO_BIG_EXCEPTION, "Call queue is full on " + getListenerAddress() + ", is hbase.ipc.server.max.callqueue.size too small?"); responder.doRespond(callTooBig); @@ -1819,6 +1822,8 @@ public class RpcServer implements RpcServerInterface { getHostAddress(); LOG.warn(msg, t); + metrics.exception(t); + // probably the hbase hadoop version does not match the running hadoop version if (t instanceof LinkageError) { t = new DoNotRetryIOException(t); @@ -2136,6 +2141,9 @@ public class RpcServer implements RpcServerInterface { } return new Pair(result, controller.cellScanner()); } catch (Throwable e) { + // increment the number of requests that were exceptions. + metrics.exception(e); + // The above callBlockingMethod will always return a SE. Strip the SE wrapper before // putting it on the wire. Its needed to adhere to the pb Service Interface but we don't // need to pass it over the wire. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcMetrics.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcMetrics.java index c2b0344..6bb97fd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcMetrics.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcMetrics.java @@ -20,6 +20,11 @@ package org.apache.hadoop.hbase.ipc; import org.apache.hadoop.hbase.CompatibilityFactory; +import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.RegionTooBusyException; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException; +import org.apache.hadoop.hbase.exceptions.RegionMovedException; import org.apache.hadoop.hbase.testclassification.RPCTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.test.MetricsAssertHelper; @@ -113,6 +118,19 @@ public class TestRpcMetrics { HELPER.assertCounter("sentBytes", 309, serverSource); HELPER.assertCounter("receivedBytes", 208, serverSource); + + mrpc.exception(null); + HELPER.assertCounter("exceptions", 1, serverSource); + + mrpc.exception(new RegionMovedException(ServerName.parseServerName("localhost:60020"), 100)); + mrpc.exception(new RegionTooBusyException()); + mrpc.exception(new OutOfOrderScannerNextException()); + mrpc.exception(new NotServingRegionException()); + HELPER.assertCounter("exceptions.RegionMovedException", 1, serverSource); + HELPER.assertCounter("exceptions.RegionTooBusyException", 1, serverSource); + HELPER.assertCounter("exceptions.OutOfOrderScannerNextException", 1, serverSource); + HELPER.assertCounter("exceptions.NotServingRegionException", 1, serverSource); + HELPER.assertCounter("exceptions", 5, serverSource); } } -- 2.3.0