From 961b93ce9042d23639d4e3feb86a38311b045238 Mon Sep 17 00:00:00 2001 From: Elliott Clark Date: Wed, 15 Apr 2015 13:49:55 -0700 Subject: [PATCH] HBASE-13477 Create metrics on failed requests Summary: Add metrics on how many requests are exceptions and what type. Test Plan: behold unit tests. Differential Revision: https://reviews.facebook.net/D37167 --- .../hadoop/hbase/ipc/MetricsHBaseServerSource.java | 22 +++++++ .../ipc/MetricsHBaseServerSourceFactoryImpl.java | 6 +- .../hbase/ipc/MetricsHBaseServerSourceImpl.java | 71 ++++++++++++++++++++-- .../hadoop/hbase/ipc/MetricsHBaseServer.java | 34 +++++++++++ .../org/apache/hadoop/hbase/ipc/RpcServer.java | 18 ++++-- .../hadoop/hbase/regionserver/RSRpcServices.java | 4 ++ .../apache/hadoop/hbase/ipc/TestRpcMetrics.java | 18 ++++++ 7 files changed, 160 insertions(+), 13 deletions(-) diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSource.java index 1f4c950..482fdba 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSource.java @@ -58,6 +58,16 @@ public interface MetricsHBaseServerSource extends BaseSource { String NUM_ACTIVE_HANDLER_NAME = "numActiveHandler"; String NUM_ACTIVE_HANDLER_DESC = "Number of active rpc handlers."; + String EXCEPTIONS_NAME="exceptions"; + String EXCEPTIONS_DESC="Exceptions caused by requests"; + String EXCEPTIONS_TYPE_DESC="Number of requests that resulted in the specified type of Exception"; + String EXCEPTIONS_OOO_NAME="exceptions.OutOfOrderScannerNextException"; + String EXCEPTIONS_BUSY_NAME="exceptions.RegionTooBusyException"; + String EXCEPTIONS_UNKNOWN_NAME="exceptions.UnknownScannerException"; + String EXCEPTIONS_SANITY_NAME="exceptions.FailedSanityCheckException"; + String EXCEPTIONS_MOVED_NAME="exceptions.RegionMovedException"; + String EXCEPTIONS_NSRE_NAME="exceptions.NotServingRegionException"; + void authorizationSuccess(); void authorizationFailure(); @@ -66,6 +76,18 @@ public interface MetricsHBaseServerSource extends BaseSource { void authenticationFailure(); + void exception(); + + /** + * Different types of exceptions + */ + void outOfOrderException(); + void failedSanityException(); + void movedRegionException(); + void notServingRegionException(); + void unknownScannerException(); + void tooBusyException(); + void sentBytes(long count); void receivedBytes(int count); diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceFactoryImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceFactoryImpl.java index cca53e0..4098e26 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceFactoryImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceFactoryImpl.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience; @InterfaceAudience.Private public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourceFactory { - private static enum SourceStorage { + private enum SourceStorage { INSTANCE; HashMap sources = @@ -39,7 +39,7 @@ public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourc } private static synchronized MetricsHBaseServerSource getSource(String serverName, - MetricsHBaseServerWrapper wrapper) { + MetricsHBaseServerWrapper wrap) { String context = createContextName(serverName); MetricsHBaseServerSource source = SourceStorage.INSTANCE.sources.get(context); @@ -49,7 +49,7 @@ public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourc context, METRICS_DESCRIPTION, context.toLowerCase(), - context + METRICS_JMX_CONTEXT_SUFFIX, wrapper); + context + METRICS_JMX_CONTEXT_SUFFIX, wrap); //Store back in storage SourceStorage.INSTANCE.sources.put(context, source); diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceImpl.java index 8eefb08..dadf9de 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServerSourceImpl.java @@ -38,6 +38,16 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl private final MutableCounterLong authenticationFailures; private final MutableCounterLong sentBytes; private final MutableCounterLong receivedBytes; + + private final MutableCounterLong exceptions; + private final MutableCounterLong exceptionsOOO; + private final MutableCounterLong exceptionsBusy; + private final MutableCounterLong exceptionsUnknown; + private final MutableCounterLong exceptionsSanity; + private final MutableCounterLong exceptionsNSRE; + private final MutableCounterLong exceptionsMoved; + + private MutableHistogram queueCallTime; private MutableHistogram processCallTime; private MutableHistogram totalCallTime; @@ -51,18 +61,32 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl this.wrapper = wrapper; this.authorizationSuccesses = this.getMetricsRegistry().newCounter(AUTHORIZATION_SUCCESSES_NAME, - AUTHORIZATION_SUCCESSES_DESC, 0l); + AUTHORIZATION_SUCCESSES_DESC, 0L); this.authorizationFailures = this.getMetricsRegistry().newCounter(AUTHORIZATION_FAILURES_NAME, - AUTHORIZATION_FAILURES_DESC, 0l); + AUTHORIZATION_FAILURES_DESC, 0L); + + this.exceptions = this.getMetricsRegistry().newCounter(EXCEPTIONS_NAME, EXCEPTIONS_DESC, 0L); + this.exceptionsOOO = this.getMetricsRegistry() + .newCounter(EXCEPTIONS_OOO_NAME, EXCEPTIONS_TYPE_DESC, 0L); + this.exceptionsBusy = this.getMetricsRegistry() + .newCounter(EXCEPTIONS_BUSY_NAME, EXCEPTIONS_TYPE_DESC, 0L); + this.exceptionsUnknown = this.getMetricsRegistry() + .newCounter(EXCEPTIONS_UNKNOWN_NAME, EXCEPTIONS_TYPE_DESC, 0L); + this.exceptionsSanity = this.getMetricsRegistry() + .newCounter(EXCEPTIONS_SANITY_NAME, EXCEPTIONS_TYPE_DESC, 0L); + this.exceptionsMoved = this.getMetricsRegistry() + .newCounter(EXCEPTIONS_MOVED_NAME, EXCEPTIONS_TYPE_DESC, 0L); + this.exceptionsNSRE = this.getMetricsRegistry() + .newCounter(EXCEPTIONS_NSRE_NAME, EXCEPTIONS_TYPE_DESC, 0L); this.authenticationSuccesses = this.getMetricsRegistry().newCounter( - AUTHENTICATION_SUCCESSES_NAME, AUTHENTICATION_SUCCESSES_DESC, 0l); + AUTHENTICATION_SUCCESSES_NAME, AUTHENTICATION_SUCCESSES_DESC, 0L); this.authenticationFailures = this.getMetricsRegistry().newCounter(AUTHENTICATION_FAILURES_NAME, - AUTHENTICATION_FAILURES_DESC, 0l); + AUTHENTICATION_FAILURES_DESC, 0L); this.sentBytes = this.getMetricsRegistry().newCounter(SENT_BYTES_NAME, - SENT_BYTES_DESC, 0l); + SENT_BYTES_DESC, 0L); this.receivedBytes = this.getMetricsRegistry().newCounter(RECEIVED_BYTES_NAME, - RECEIVED_BYTES_DESC, 0l); + RECEIVED_BYTES_DESC, 0L); this.queueCallTime = this.getMetricsRegistry().newHistogram(QUEUE_CALL_TIME_NAME, QUEUE_CALL_TIME_DESC); this.processCallTime = this.getMetricsRegistry().newHistogram(PROCESS_CALL_TIME_NAME, @@ -87,6 +111,41 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl } @Override + public void exception() { + exceptions.incr(); + } + + @Override + public void outOfOrderException() { + exceptionsOOO.incr(); + } + + @Override + public void failedSanityException() { + exceptionsSanity.incr(); + } + + @Override + public void movedRegionException() { + exceptionsMoved.incr(); + } + + @Override + public void notServingRegionException() { + exceptionsNSRE.incr(); + } + + @Override + public void unknownScannerException() { + exceptionsUnknown.incr(); + } + + @Override + public void tooBusyException() { + exceptionsBusy.incr(); + } + + @Override public void authenticationSuccess() { authenticationSuccesses.incr(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServer.java index 825e688..3ca50ad 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/MetricsHBaseServer.java @@ -19,8 +19,14 @@ package org.apache.hadoop.hbase.ipc; +import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.RegionTooBusyException; +import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.CompatibilitySingletonFactory; +import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException; +import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException; +import org.apache.hadoop.hbase.exceptions.RegionMovedException; @InterfaceAudience.Private public class MetricsHBaseServer { @@ -67,6 +73,34 @@ public class MetricsHBaseServer { source.queuedAndProcessedCall(totalTime); } + public void exception(Throwable throwable) { + source.exception(); + + /** + * Keep some metrics for commonly seen exceptions + * + * Try and put the most common types first. + * Place child types before the parent type that they extend. + * + * If this gets much larger we might have to go to a hashmap + */ + if (throwable != null) { + if (throwable instanceof OutOfOrderScannerNextException) { + source.outOfOrderException(); + } else if (throwable instanceof RegionTooBusyException) { + source.tooBusyException(); + } else if (throwable instanceof UnknownScannerException) { + source.unknownScannerException(); + } else if (throwable instanceof RegionMovedException) { + source.movedRegionException(); + } else if (throwable instanceof NotServingRegionException) { + source.notServingRegionException(); + } else if (throwable instanceof FailedSanityCheckException) { + source.failedSanityException(); + } + } + } + public MetricsHBaseServerSource getMetricsSource() { return source; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java index c69a187..95b0a2c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java @@ -158,6 +158,8 @@ import com.google.protobuf.TextFormat; @InterfaceStability.Evolving public class RpcServer implements RpcServerInterface { public static final Log LOG = LogFactory.getLog(RpcServer.class); + private static final CallQueueTooBigException CALL_QUEUE_TOO_BIG_EXCEPTION + = new CallQueueTooBigException(); private final boolean authorize; private boolean isSecurityEnabled; @@ -1465,6 +1467,7 @@ public class RpcServer implements RpcServerInterface { saslServer.dispose(); saslServer = null; } catch (SaslException ignored) { + // Ignored. This is being disposed of anyway. } } } @@ -1542,7 +1545,7 @@ public class RpcServer implements RpcServerInterface { // Else it will be length of the data to read (or -1 if a ping). We catch the integer // length into the 4-byte this.dataLengthBuffer. int count = read4Bytes(); - if (count < 0 || dataLengthBuffer.remaining() > 0 ) { + if (count < 0 || dataLengthBuffer.remaining() > 0) { return count; } @@ -1787,9 +1790,10 @@ public class RpcServer implements RpcServerInterface { new Call(id, this.service, null, null, null, null, this, responder, totalRequestSize, null, null); ByteArrayOutputStream responseBuffer = new ByteArrayOutputStream(); - setupResponse(responseBuffer, callTooBig, new CallQueueTooBigException(), - "Call queue is full on " + getListenerAddress() + - ", is hbase.ipc.server.max.callqueue.size too small?"); + metrics.exception(CALL_QUEUE_TOO_BIG_EXCEPTION); + setupResponse(responseBuffer, callTooBig, CALL_QUEUE_TOO_BIG_EXCEPTION, + "Call queue is full on " + getListenerAddress() + + ", is hbase.ipc.server.max.callqueue.size too small?"); responder.doRespond(callTooBig); return; } @@ -1819,6 +1823,8 @@ public class RpcServer implements RpcServerInterface { getHostAddress(); LOG.warn(msg, t); + metrics.exception(t); + // probably the hbase hadoop version does not match the running hadoop version if (t instanceof LinkageError) { t = new DoNotRetryIOException(t); @@ -2140,6 +2146,10 @@ public class RpcServer implements RpcServerInterface { // putting it on the wire. Its needed to adhere to the pb Service Interface but we don't // need to pass it over the wire. if (e instanceof ServiceException) e = e.getCause(); + + // increment the number of requests that were exceptions. + metrics.exception(e); + if (e instanceof LinkageError) throw new DoNotRetryIOException(e); if (e instanceof IOException) throw (IOException)e; LOG.error("Unexpected throwable object ", e); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index f9b8d61..050b108 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -552,6 +552,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, .setName(result.getClass().getName()) .setValue(result.toByteString()))); } catch (IOException ioe) { + rpcServer.getMetrics().exception(ioe); resultOrExceptionBuilder.setException(ResponseConverter.buildException(ioe)); } } else if (action.hasMutation()) { @@ -601,6 +602,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, // case the corresponding ResultOrException instance for the Put or Delete will be added // down in the doBatchOp method call rather than up here. } catch (IOException ie) { + rpcServer.getMetrics().exception(ie); resultOrExceptionBuilder = ResultOrException.newBuilder(). setException(ResponseConverter.buildException(ie)); } @@ -1897,6 +1899,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, region = getRegion(regionAction.getRegion()); quota = getQuotaManager().checkQuota(region, regionAction.getActionList()); } catch (IOException e) { + rpcServer.getMetrics().exception(e); regionActionResultBuilder.setException(ResponseConverter.buildException(e)); responseBuilder.addRegionActionResult(regionActionResultBuilder.build()); continue; // For this region it's a failure. @@ -1927,6 +1930,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, processed = Boolean.TRUE; } } catch (IOException e) { + rpcServer.getMetrics().exception(e); // As it's atomic, we may expect it's a global failure. regionActionResultBuilder.setException(ResponseConverter.buildException(e)); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcMetrics.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcMetrics.java index c2b0344..6bb97fd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcMetrics.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcMetrics.java @@ -20,6 +20,11 @@ package org.apache.hadoop.hbase.ipc; import org.apache.hadoop.hbase.CompatibilityFactory; +import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.RegionTooBusyException; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException; +import org.apache.hadoop.hbase.exceptions.RegionMovedException; import org.apache.hadoop.hbase.testclassification.RPCTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.test.MetricsAssertHelper; @@ -113,6 +118,19 @@ public class TestRpcMetrics { HELPER.assertCounter("sentBytes", 309, serverSource); HELPER.assertCounter("receivedBytes", 208, serverSource); + + mrpc.exception(null); + HELPER.assertCounter("exceptions", 1, serverSource); + + mrpc.exception(new RegionMovedException(ServerName.parseServerName("localhost:60020"), 100)); + mrpc.exception(new RegionTooBusyException()); + mrpc.exception(new OutOfOrderScannerNextException()); + mrpc.exception(new NotServingRegionException()); + HELPER.assertCounter("exceptions.RegionMovedException", 1, serverSource); + HELPER.assertCounter("exceptions.RegionTooBusyException", 1, serverSource); + HELPER.assertCounter("exceptions.OutOfOrderScannerNextException", 1, serverSource); + HELPER.assertCounter("exceptions.NotServingRegionException", 1, serverSource); + HELPER.assertCounter("exceptions", 5, serverSource); } } -- 2.3.0