From f28184c1874bb642f7e6f51aef4b43cb77f232dd Mon Sep 17 00:00:00 2001 From: mattmccline-microsoft <84488164+mattmccline-microsoft@users.noreply.github.com> Date: Thu, 5 Aug 2021 22:02:17 -0700 Subject: [PATCH] Patch #04 Improve Exception Handling --- .../hive/service/cli/HiveSQLException.java | 33 +++ .../service/cli/thrift/ThriftCLIService.java | 88 ++++--- .../service/cli/thrift/ThriftHttpServlet.java | 31 ++- .../hadoop/hive/metastore/ExceptUtils.java | 242 ++++++++++++++++++ .../metastore/RetryingMetaStoreClient.java | 207 ++++++++++----- .../hive/metastore/RetryingHMSHandler.java | 112 ++++---- 6 files changed, 563 insertions(+), 150 deletions(-) create mode 100644 standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ExceptUtils.java diff --git a/service/src/java/org/apache/hive/service/cli/HiveSQLException.java b/service/src/java/org/apache/hive/service/cli/HiveSQLException.java index f180830609..dee46f4eb5 100644 --- a/service/src/java/org/apache/hive/service/cli/HiveSQLException.java +++ b/service/src/java/org/apache/hive/service/cli/HiveSQLException.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.List; +import org.apache.hadoop.hive.metastore.ExceptUtils; import org.apache.hive.service.rpc.thrift.TStatus; import org.apache.hive.service.rpc.thrift.TStatusCode; @@ -118,6 +119,38 @@ public HiveSQLException(TStatus status) { super(status.getErrorMessage(), status.getSqlState(), status.getErrorCode()); } + + /** + * Wrap an Exception caught by ThriftCLIService operation method such as GetTables, etc. + * + * We even wrap a HiveSQLException with itself because we want to show where in the code an + * Exception was caught, stuffed into the Thrift Response, and not rethrown. + * Otherwise, a stack trace can be difficult to interpret if it isn't clear how far it went up + * the call chain it went. Was it an uncaught Exception that killed the thread? + * + * @param operationName the name of the request. E.g. GetInfo. + * @param cause the Exception that was caught and failed the request. + * + * @return a {@link HiveSQLException} object + */ + public static HiveSQLException wrapForResponse(String operationName, Exception cause) { + Throwable rootCause = cause; + while (true) { + Throwable nextCause = rootCause.getCause(); + if (nextCause == null) { + break; + } + rootCause = nextCause; + } + String rootMsg = rootCause.getMessage(); + + String msg = operationName + " error: " + rootCause.getClass().getName() + (rootMsg.isEmpty() ? "" : " " + rootMsg); + HiveSQLException hse = new HiveSQLException(msg, cause); + // Get rid of the call to wrapForResponse. + ExceptUtils.removeFirstStackTraceEle(hse); + return hse; + } + /** * Converts current object to a {@link TStatus} object. * diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java index 1eabe161b5..7b6e191c0f 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java @@ -419,7 +419,8 @@ public TOpenSessionResp OpenSession(TOpenSessionReq req) throws TException { } catch (Exception e) { // Do not log request as it contains password information LOG.error("Login attempt failed for user : {}", userName, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("OpenSession", e); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -614,8 +615,9 @@ public TCloseSessionResp CloseSession(TCloseSessionReq req) throws TException { context.clearSessionHandle(); } } catch (Exception e) { - LOG.error("Failed to close the session", e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("CloseSession", e); + LOG.error("Failed to close the session", hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -630,8 +632,9 @@ public TGetInfoResp GetInfo(TGetInfoReq req) throws TException { resp.setInfoValue(getInfoValue.toTGetInfoValue()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get info", e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetInfo", e); + LOG.error("Failed to get info", hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -670,8 +673,9 @@ public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws T // Note: it's rather important that this (and other methods) catch Exception, not Throwable; // in combination with HiveSessionProxy.invoke code, perhaps unintentionally, it used // to also catch all errors; and now it allows OOMs only to propagate. - LOG.error("Failed to execute statement [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("ExecuteStatement", e); + LOG.error("Failed to execute statement [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -684,8 +688,9 @@ public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws TException { resp.setOperationHandle(operationHandle.toTOperationHandle()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get type info [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetTypeInfo", e); + LOG.error("Failed to get type info [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -698,8 +703,9 @@ public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws TException { resp.setOperationHandle(opHandle.toTOperationHandle()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed getting catalogs [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetCatalogs", e); + LOG.error("Failed getting catalogs [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -713,8 +719,9 @@ public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws TException { resp.setOperationHandle(opHandle.toTOperationHandle()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get schemas [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetSchemas", e); + LOG.error("Failed to get schemas [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -729,8 +736,9 @@ public TGetTablesResp GetTables(TGetTablesReq req) throws TException { resp.setOperationHandle(opHandle.toTOperationHandle()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get tables [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetTables", e); + LOG.error("Failed to get tables [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -743,8 +751,9 @@ public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws TException resp.setOperationHandle(opHandle.toTOperationHandle()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get table types [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetTableTypes", e); + LOG.error("Failed to get table types [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -762,8 +771,9 @@ public TGetColumnsResp GetColumns(TGetColumnsReq req) throws TException { resp.setOperationHandle(opHandle.toTOperationHandle()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get column types [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetColumns", e); + LOG.error("Failed to get column types [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -778,8 +788,9 @@ public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws TException { resp.setOperationHandle(opHandle.toTOperationHandle()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get function: [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetFunctions", e); + LOG.error("Failed to get function: [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -837,8 +848,9 @@ public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) th } resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get operation status [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetOperationStatus", e); + LOG.error("Failed to get operation status [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -850,8 +862,9 @@ public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws TExc cliService.cancelOperation(new OperationHandle(req.getOperationHandle())); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get cancel operation [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("CancelOperation", e); + LOG.error("Failed to get cancel operation [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -863,8 +876,9 @@ public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws TExcept cliService.closeOperation(new OperationHandle(req.getOperationHandle())); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to close operation [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("CloseOperation", e); + LOG.error("Failed to close operation [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -878,8 +892,9 @@ public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq r resp.setSchema(schema.toTTableSchema()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get result set metadata [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetResultSetMetadata", e); + LOG.error("Failed to get result set metadata [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -905,8 +920,9 @@ public TFetchResultsResp FetchResults(TFetchResultsReq req) throws TException { resp.setHasMoreRows(false); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed fetch results [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("FetchResults", e); + LOG.error("Failed fetch results [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -922,8 +938,9 @@ public TGetPrimaryKeysResp GetPrimaryKeys(TGetPrimaryKeysReq req) resp.setOperationHandle(opHandle.toTOperationHandle()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get primary keys [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetPrimaryKeys", e); + LOG.error("Failed to get primary keys [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } @@ -940,8 +957,9 @@ public TGetCrossReferenceResp GetCrossReference(TGetCrossReferenceReq req) resp.setOperationHandle(opHandle.toTOperationHandle()); resp.setStatus(OK_STATUS); } catch (Exception e) { - LOG.error("Failed to get cross reference [request: {}]", req, e); - resp.setStatus(HiveSQLException.toTStatus(e)); + HiveSQLException hse = HiveSQLException.wrapForResponse("GetCrossReference", e); + LOG.error("Failed to get cross reference [request: {}]", req, hse); + resp.setStatus(HiveSQLException.toTStatus(hse)); } return resp; } diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java index f734c40814..f9eecacb36 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.ExceptUtils; import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.Utils; @@ -282,8 +283,34 @@ protected void doPost(HttpServletRequest request, HttpServletResponse response) } } response.getWriter().println("Authentication Error: " + e.getMessage()); - } - finally { + } catch (ServletException e) { + /* + * This error may indicate the Response was NOT successfully sent back the client due to a connection error. + * + * We wrap the exception with another ServletException to make visible that this method is what issued the + * doPost call. And, to log an ERROR message (Jetty only issues a warning which often missed as significant). + */ + ServletException se = new ServletException("POST request error: ", e); + LOG.error("Error processing POST request: ", se); + throw se; + } catch (IOException e) { + // (ditto comments for ServletException). + IOException ioe = new IOException("POST request error: ", e); + LOG.error("Error processing POST request: ", ioe); + throw ioe; + } catch (Exception e) { + /* + * Unchecked exceptions caught here like NullPointerException, etc. whose super class is RuntimeException. + * We wrap them with a ServletException so they will not be program fatal. + * + * A java.lang.Error is distinct from Exception and caught in the next catch clause. + */ + ServletException se = new ServletException("POST request failed: ", e); + LOG.error("Error processing POST request: ", se); + throw se; + } catch (Error error) { + ExceptUtils.handleFatalError("POST request", error); + } finally { // Clear the thread locals SessionManager.clearUserName(); SessionManager.clearIpAddress(); diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ExceptUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ExceptUtils.java new file mode 100644 index 0000000000..b5aad97a3e --- /dev/null +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ExceptUtils.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.List; + +public class ExceptUtils { + private static final Logger LOG = LoggerFactory.getLogger(ExceptUtils.class.getName()); + + // We even wrap a MetaException with itself to indicate where the exception was processed by the client. + public static MetaException wrapMetastoreClientException(String methodName, + Throwable cause) { + Throwable rootCause = cause; + while (true) { + Throwable nextCause = rootCause.getCause(); + if (nextCause == null) { + break; + } + rootCause = nextCause; + } + String rootMsg = rootCause.getMessage(); + String msg = methodName + " error: " + rootCause.getClass().getName() + (rootMsg.isEmpty() ? "" : " " + rootMsg); + MetaException me = new MetaException(msg); + // Get rid of the call to wrapMetastoreClientException. + ExceptUtils.removeFirstStackTraceEle(me); + return me; + } + + /* + * Technically, you are not suppose to catch java.lang.Error (OOM, etc.) but it is unavoidable with wrap + * exceptions like UndeclaredThrowableException. + * + * Attempt to create wrapper Error to show we intercepted it here, log, and throw. Otherwise, rethrow. + */ + public static void handleFatalError(String operationName, Error error) { + boolean isRethrow = true; + Error wrappedError = null; + try { + wrappedError = new Error(operationName + " fatal error: ", error); + // Get rid of the call to handleFatalError. + ExceptUtils.removeFirstStackTraceEle(wrappedError); + LOG.error("Fatal error for " + operationName + ": ", wrappedError); + isRethrow = false; + } catch (Throwable t) { + // Suppress.. + } + if (!isRethrow) { + throw wrappedError; + } else { + throw error; + } + } + + public static class MethodInvokeResult { + private final ResultKind resultKind; + private final Exception exception; + private final Error error; + + public enum ResultKind { + BASE_DECLARED_METHOD_EXCEPTION, + SUBCLASS_DECLARED_METHOD_EXCEPTION, + OTHER_DECLARED_METHOD_EXCEPTION, + UNDECLARED_METHOD_EXCEPTION, + INVOCATION_EXCEPTION, + UNEXPECTED_EXCEPTION, + ERROR; + } + + private MethodInvokeResult(ResultKind resultKind, Exception e) { + this.resultKind = resultKind; + exception = e; + error = null; + } + + private MethodInvokeResult(Error error) { + resultKind = ResultKind.ERROR; + exception = null; + this.error = error; + } + + public ResultKind getResultKind() { + return resultKind; + } + + public Exception getException() { + return exception; + } + + public Error getError() { + return error; + } + + public static MethodInvokeResult createBaseDeclaredMethodException(Exception e) { + return new MethodInvokeResult(ResultKind.BASE_DECLARED_METHOD_EXCEPTION, e); + } + + public static MethodInvokeResult createSubClassDeclaredMethodException(Exception e) { + return new MethodInvokeResult(ResultKind.SUBCLASS_DECLARED_METHOD_EXCEPTION, e); + } + + public static MethodInvokeResult createOtherDeclaredMethodException(Exception e) { + return new MethodInvokeResult(ResultKind.OTHER_DECLARED_METHOD_EXCEPTION, e); + } + + public static MethodInvokeResult createUndeclaredMethodException(Exception e) { + return new MethodInvokeResult(ResultKind.UNDECLARED_METHOD_EXCEPTION, e); + } + + public static MethodInvokeResult createInvocationException(Exception e) { + return new MethodInvokeResult(ResultKind.INVOCATION_EXCEPTION, e); + } + + public static MethodInvokeResult createUnexpectedException(Exception e) { + return new MethodInvokeResult(ResultKind.UNEXPECTED_EXCEPTION, e); + } + + public static MethodInvokeResult createError(Error error) { + return new MethodInvokeResult(error); + } + } + + public static MethodInvokeResult evaluateMethodInvokeThrowable(Method method, Throwable throwable, Class... baseExcClasses) { + if (throwable instanceof InvocationTargetException) { + Throwable cause = throwable.getCause(); + if (cause instanceof Error) { + return MethodInvokeResult.createError((Error) cause); + } + // A checked or unchecked Exception. + Exception e = (Exception) cause; + Class excClass = e.getClass(); + + /* + * Determine if it is an Exception in the throws clause of the method (i.e. declared). + * Be careful because a base and subclass can both be in the throws class. + */ + Class[] declareExceptionTypes = method.getExceptionTypes(); + + // Segregate base classes from others. + List> declBaseExcTypes = new ArrayList>(); + List> declSubClassExcTypes = new ArrayList>(); + List> declOtherExcTypes = new ArrayList>(); + for (Class declExcType : declareExceptionTypes) { + boolean isConsumed = false; + for (Class baseClass : baseExcClasses) { + if (declExcType.equals(baseClass)) { + declBaseExcTypes.add(declExcType); + isConsumed = true; + break; + } else if (baseClass.isAssignableFrom(declExcType)) { + declSubClassExcTypes.add(declExcType); + isConsumed = true; + break; + } + } + if (!isConsumed) { + declOtherExcTypes.add(declExcType); + } + } + // Now look for matching explicit other classes. Might be a subclass of a base class, might not. + for (Class subClassDeclExcType : declSubClassExcTypes) { + if (excClass.equals(subClassDeclExcType)) { + return MethodInvokeResult.createSubClassDeclaredMethodException(e); + } + } + // Now look for matching base classes. + for (Class baseDeclExcType : declBaseExcTypes) { + if (excClass.equals(baseDeclExcType)) { + return MethodInvokeResult.createBaseDeclaredMethodException(e); + } + } + // Now look for matching other classes. + for (Class otherDeclExcType : declOtherExcTypes) { + if (excClass.equals(otherDeclExcType)) { + return MethodInvokeResult.createOtherDeclaredMethodException(e); + } + } + + // We leave it to the caller to classify the undeclared Exception. + return MethodInvokeResult.createUndeclaredMethodException(e); + } else if (throwable instanceof IllegalAccessException | throwable instanceof IllegalArgumentException) { + return MethodInvokeResult.createInvocationException((Exception) throwable); + } else { + // Unexpected. Perhaps an NPE from a null parameter. + if (throwable instanceof Error) { + return MethodInvokeResult.createError((Error) throwable); + } + return MethodInvokeResult.createUnexpectedException((Exception) throwable); + } + } + + public static T wrapCaughtException(T caughtExc) { + Throwable caughtThrowable = (Throwable) caughtExc; + Throwable wrapThrowable; + try { + Class excClass = caughtExc.getClass(); + Constructor constructor = excClass.getConstructor(String.class); + String msg = caughtThrowable.getMessage(); + + wrapThrowable = (Throwable) constructor.newInstance(msg); + wrapThrowable.initCause(caughtThrowable); + ExceptUtils.removeFirstStackTraceEle(wrapThrowable); + } catch (Exception e) { + return caughtExc; + } + return (T) wrapThrowable; + } + + public static void removeFirstStackTraceEle(Throwable t) { + StackTraceElement[] stackTrace = t.getStackTrace(); + final int len = stackTrace.length; + StackTraceElement[] newStackTrace = new StackTraceElement[len - 1]; + for (int i = 1; i < len; i++) { + newStackTrace[i - 1] = stackTrace[i]; + } + t.setStackTrace(newStackTrace); + } +} diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java index 6eadf19462..0eb85d1d17 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/RetryingMetaStoreClient.java @@ -158,8 +158,18 @@ public static IMetaStoreClient getProxy(Configuration hiveConf, Class[] const RetryingMetaStoreClient.class.getClassLoader(), baseClass.getInterfaces(), handler); } + /* + * When we look at the (generated) IMetaStoreClient interface the method's throws clause has exceptions whose + * superclass is Thrift's TException. + * + * Very commonly MetaException is thrown. But also ones more specific to a particular method such as: + * + * AlreadyExistsException, InvalidObjectException, NoSuchObjectException, UnknownDBException, and more. + * + * And generic ones like TProtocolException, TTransportException, etc. + */ @Override - public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { + public Object invoke(Object proxy, Method method, Object[] args) throws TException { Object ret; int retriesMade = 0; TException caughtException; @@ -176,40 +186,20 @@ public Object invoke(Object proxy, Method method, Object[] args) throws Throwabl } while (true) { - try { - SecurityUtils.reloginExpiringKeytabUser(); - - if (allowReconnect) { - if (retriesMade > 0 || hasConnectionLifeTimeReached(method)) { - if (this.ugi != null) { - // Perform reconnect with the proper user context - try { - LOG.info("RetryingMetaStoreClient trying reconnect as " + this.ugi); - - this.ugi.doAs( - new PrivilegedExceptionAction () { - @Override - public Object run() throws MetaException { - base.reconnect(); - return null; - } - }); - } catch (UndeclaredThrowableException e) { - Throwable te = e.getCause(); - if (te instanceof PrivilegedActionException) { - throw te.getCause(); - } else { - throw te; - } - } - lastConnectionTime = System.currentTimeMillis(); - } else { - LOG.warn("RetryingMetaStoreClient unable to reconnect. No UGI information."); - throw new MetaException("UGI information unavailable. Will not attempt a reconnect."); - } + SecurityUtils.reloginExpiringKeytabUser(); + + if (allowReconnect) { + if (retriesMade > 0 || hasConnectionLifeTimeReached(method)) { + if (this.ugi != null) { + reconnect(); + } else { + LOG.warn("RetryingMetaStoreClient unable to reconnect. No UGI information."); + throw new MetaException("UGI information unavailable. Will not attempt a reconnect."); } } + } + try { if (metaCallTimeMap == null) { ret = method.invoke(base, args); } else { @@ -219,51 +209,138 @@ public Object run() throws MetaException { long timeTaken = System.currentTimeMillis() - startTime; addMethodTime(method, timeTaken); } + // Successful. break; - } catch (UndeclaredThrowableException e) { - throw e.getCause(); - } catch (InvocationTargetException e) { - Throwable t = e.getCause(); - if (t instanceof TApplicationException) { - TApplicationException tae = (TApplicationException)t; - switch (tae.getType()) { - case TApplicationException.UNSUPPORTED_CLIENT_TYPE: - case TApplicationException.UNKNOWN_METHOD: - case TApplicationException.WRONG_METHOD_NAME: - case TApplicationException.INVALID_PROTOCOL: - throw t; - default: - // TODO: most other options are probably unrecoverable... throw? - caughtException = tae; - } - } else if ((t instanceof TProtocolException) || (t instanceof TTransportException)) { - // TODO: most protocol exceptions are probably unrecoverable... throw? - caughtException = (TException)t; - } else if ((t instanceof MetaException) && isRecoverableMetaException((MetaException) t)) { - caughtException = (MetaException)t; - } else { - throw t; - } - } catch (MetaException e) { - if (isRecoverableMetaException(e)) { - caughtException = e; - } else { - throw e; - } + } catch (Throwable t) { + ExceptUtils.MethodInvokeResult methodInvokeResult = + ExceptUtils.evaluateMethodInvokeThrowable(method, t, TException.class); + // If the Throwable is not retryable, an TException or Error is thrown. + caughtException = getRetryableExceptionFromInvoke(method.getName(), methodInvokeResult); } - + // Fall through to here with a retryable TException. if (retriesMade >= retryLimit || base.isLocalMetaStore() || !allowRetry) { - throw caughtException; + throw ExceptUtils.wrapCaughtException(caughtException); } retriesMade++; LOG.warn("MetaStoreClient lost connection. Attempting to reconnect (" + retriesMade + " of " + retryLimit + ") after " + retryDelaySeconds + "s. " + method.getName(), caughtException); - Thread.sleep(retryDelaySeconds * 1000); + try { + Thread.sleep(retryDelaySeconds * 1000); + } catch (InterruptedException e) { + MetaException me = ExceptUtils.wrapMetastoreClientException(method.getName(), e); + LOG.error("Error happened calling method " + method.getName() + ": ", me); + throw me; + } } return ret; } + // UNDONE: Retryable? + private void reconnect() throws MetaException { + // Perform reconnect with the proper user context + try { + LOG.info("RetryingMetaStoreClient trying reconnect as " + this.ugi); + + this.ugi.doAs( + new PrivilegedExceptionAction() { + @Override + public Object run() throws MetaException { + base.reconnect(); + return null; + } + }); + } catch (UndeclaredThrowableException e) { + Throwable cause = e.getCause(); + if (cause instanceof Error) { + ExceptUtils.handleFatalError("reconnect", (Error) cause); + } + if (cause instanceof PrivilegedActionException) { + throw ExceptUtils.wrapMetastoreClientException("reconnect", cause.getCause()); + } else { + throw ExceptUtils.wrapMetastoreClientException("reconnect", cause); + } + } catch (InterruptedException | IOException e) { + throw ExceptUtils.wrapMetastoreClientException("reconnect", e); + } + lastConnectionTime = System.currentTimeMillis(); + } + + private TException getRetryableExceptionFromInvoke(String methodName, + ExceptUtils.MethodInvokeResult methodInvokeResult) + throws TException { + switch (methodInvokeResult.getResultKind()) { + case BASE_DECLARED_METHOD_EXCEPTION: + { + // // UNDONE: If not Retryable, throw. + return (TException) methodInvokeResult.getException(); + } + case SUBCLASS_DECLARED_METHOD_EXCEPTION: + // Client wants this kind of Exception from method. + throw ExceptUtils.wrapCaughtException((TException) methodInvokeResult.getException()); + case OTHER_DECLARED_METHOD_EXCEPTION: + { + // No match to method's throws clause Exception list on client side. + Exception e = methodInvokeResult.getException(); + if (e instanceof TException) { + // Is is, however, a subclass of a base class. + TException tException = (TException) e; + if (!isRecoverableTException(tException)) { + throw ExceptUtils.wrapCaughtException(tException); + } + } + // Otherwise, it is unknown to this code -- assume it is not retryable.. + MetaException me = new MetaException(methodName + " error: "); + me.initCause(e); + throw me; + } + case UNDECLARED_METHOD_EXCEPTION: + { + Exception e = methodInvokeResult.getException(); + if (e instanceof TException) { + TException tException = (TException) e; + if (!isRecoverableTException(tException)) { + throw ExceptUtils.wrapCaughtException(tException); + } + return tException; + } else { + // Assume not retryable. + MetaException me = new MetaException(methodName + " error: "); + me.initCause(methodInvokeResult.getException()); + throw me; + } + } + case INVOCATION_EXCEPTION: + case UNEXPECTED_EXCEPTION: + { + // Assume not retryable. + MetaException me = new MetaException(methodName + " error: "); + me.initCause(methodInvokeResult.getException()); + throw me; + } + case ERROR: + { + ExceptUtils.handleFatalError(methodName + " error: ", methodInvokeResult.getError()); + return null; // Unreachable. + } + default: + throw new RuntimeException("Unexpected method invoke result: " + methodInvokeResult.getResultKind()); + } + } + + private MetaException throwUnexpectedBaseClass(Exception e) throws MetaException { + MetaException me = new MetaException("Internal error"); + me.initCause(new RuntimeException("Only TException is currently a base class", e)); + return me; + } + + private static boolean isRecoverableTException(TException e) { + if (e instanceof MetaException) { + return isRecoverableMetaException((MetaException) e); + } + return (e instanceof TProtocolException) || (e instanceof TTransportException); + } + private static boolean isRecoverableMetaException(MetaException e) { String m = e.getMessage(); if (m == null) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java index 169ac02e14..cddb4452d4 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RetryingHMSHandler.java @@ -25,7 +25,6 @@ import java.lang.reflect.UndeclaredThrowableException; import java.util.concurrent.TimeUnit; -import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; @@ -79,10 +78,15 @@ private RetryingHMSHandler(Configuration origConf, IHMSHandler baseHandler, bool //in case of transient failures in init method invoke(baseHandler, baseHandler.getClass().getDeclaredMethod("init", (Class[]) null), null); - } catch (Throwable e) { - LOG.error("HMSHandler Fatal error: " + ExceptionUtils.getStackTrace(e)); - MetaException me = new MetaException(e.getMessage()); - me.initCause(e); + } catch (MetaException e) { + // Our RetryingHMSHandler.invoke method exception. + MetaException me = ExceptUtils.wrapMetastoreClientException("init", e); + LOG.error("HMSHandler fatal init error: ", me); + throw me; + } catch (NoSuchMethodException | SecurityException e) { + // Class.getDeclaredMethod exceptions. + MetaException me = ExceptUtils.wrapMetastoreClientException("init", e); + LOG.error("HMSHandler fatal error: ", me); throw me; } } @@ -98,7 +102,7 @@ public static IHMSHandler getProxy(Configuration conf, IHMSHandler baseHandler, } @Override - public Object invoke(final Object proxy, final Method method, final Object[] args) throws Throwable { + public Object invoke(final Object proxy, final Method method, final Object[] args) throws MetaException { int retryCount = -1; int threadId = baseHandler.getThreadId(); boolean error = true; @@ -117,7 +121,7 @@ public Object invoke(final Object proxy, final Method method, final Object[] arg } } - public Result invokeInternal(final Object proxy, final Method method, final Object[] args) throws Throwable { + public Result invokeInternal(final Object proxy, final Method method, final Object[] args) throws MetaException { boolean gotNewConnectUrl = false; boolean reloadConf = MetastoreConf.getBoolVar(origConf, ConfVars.HMS_HANDLER_FORCE_RELOAD_CONF); @@ -151,63 +155,69 @@ public Result invokeInternal(final Object proxy, final Method method, final Obje } } return new Result(object, retryCount); - } catch (UndeclaredThrowableException e) { - if (e.getCause() != null) { - if (e.getCause() instanceof javax.jdo.JDOException) { - // Due to reflection, the jdo exception is wrapped in - // invocationTargetException - caughtException = e.getCause(); - } else if (e.getCause() instanceof MetaException && e.getCause().getCause() != null - && e.getCause().getCause() instanceof javax.jdo.JDOException) { - // The JDOException may be wrapped further in a MetaException - caughtException = e.getCause().getCause(); - } else { - LOG.error(ExceptionUtils.getStackTrace(e.getCause())); - throw e.getCause(); - } + // Exception not declared in method's exception throws clause... + Throwable cause = e.getCause(); + if (cause instanceof Error) { + ExceptUtils.handleFatalError(method.getName(), (Error) cause); + } + if (cause instanceof javax.jdo.JDOException) { + // Due to reflection, the jdo exception is wrapped in + // invocationTargetException + caughtException = e.getCause(); + } else if (cause instanceof MetaException && cause.getCause() != null + && cause.getCause() instanceof javax.jdo.JDOException) { + // The JDOException may be wrapped further in a MetaException + caughtException = cause.getCause(); } else { - LOG.error(ExceptionUtils.getStackTrace(e)); - throw e; + MetaException me = ExceptUtils.wrapMetastoreClientException(method.getName(), cause); + LOG.error("Error happened calling method " + method.getName() + ": ", me); + throw me; } } catch (InvocationTargetException e) { - if (e.getCause() instanceof javax.jdo.JDOException) { + Throwable cause = e.getCause(); + if (cause instanceof javax.jdo.JDOException) { // Due to reflection, the jdo exception is wrapped in // invocationTargetException - caughtException = e.getCause(); - } else if (e.getCause() instanceof NoSuchObjectException || e.getTargetException().getCause() instanceof NoSuchObjectException) { + caughtException = cause; + } else if (cause instanceof NoSuchObjectException || e.getTargetException().getCause() instanceof NoSuchObjectException) { + MetaException me = ExceptUtils.wrapMetastoreClientException(method.getName(), cause); String methodName = method.getName(); if (!methodName.startsWith("get_database") && !methodName.startsWith("get_table") - && !methodName.startsWith("get_partition") && !methodName.startsWith("get_function") - && !methodName.startsWith("get_stored_procedure") && !methodName.startsWith("find_package")) { - LOG.error(ExceptionUtils.getStackTrace(e.getCause())); + && !methodName.startsWith("get_partition") && !methodName.startsWith("get_function")) { + LOG.error("Error happened calling method " + method.getName() + ": ", me); } - throw e.getCause(); - } else if (e.getCause() instanceof MetaException && e.getCause().getCause() != null) { - if (e.getCause().getCause() instanceof javax.jdo.JDOException || - e.getCause().getCause() instanceof NucleusException) { + throw me; + } else if (cause instanceof MetaException && cause.getCause() != null) { + Throwable actualCause = cause.getCause(); + if (actualCause instanceof javax.jdo.JDOException || + actualCause instanceof NucleusException) { // The JDOException or the Nucleus Exception may be wrapped further in a MetaException - caughtException = e.getCause().getCause(); - } else if (e.getCause().getCause() instanceof DeadlineException) { + caughtException = actualCause; + } else if (actualCause instanceof DeadlineException) { // The Deadline Exception needs no retry and be thrown immediately. Deadline.clear(); - LOG.error("Error happens in method " + method.getName() + ": " + - ExceptionUtils.getStackTrace(e.getCause())); - throw e.getCause(); + MetaException me = ExceptUtils.wrapMetastoreClientException(method.getName(), actualCause); + LOG.error("Error happened calling method " + method.getName() + ": ", me); + throw me; } else { - LOG.error(ExceptionUtils.getStackTrace(e.getCause())); - throw e.getCause(); + MetaException me = ExceptUtils.wrapMetastoreClientException(method.getName(), cause); + LOG.error("Error happened calling method " + method.getName() + ": ", me); + throw me; } } else { - LOG.error(ExceptionUtils.getStackTrace(e.getCause())); - throw e.getCause(); + MetaException me = ExceptUtils.wrapMetastoreClientException(method.getName(), cause); + LOG.error("Error happened calling method " + method.getName() + ": ", me); + throw me; } + } catch (IllegalAccessException e) { + MetaException me = ExceptUtils.wrapMetastoreClientException(method.getName(), e); + LOG.error("Error happened calling method " + method.getName() + ": ", me); + throw me; } - + MetaException me = ExceptUtils.wrapMetastoreClientException(method.getName(), caughtException); if (retryCount >= retryLimit) { - LOG.error("HMSHandler Fatal error: " + ExceptionUtils.getStackTrace(caughtException)); - MetaException me = new MetaException(caughtException.toString()); - me.initCause(caughtException); + LOG.error("HMSHandler fatal error calling method " + method.getName() + ": ", me); throw me; } @@ -216,9 +226,15 @@ public Result invokeInternal(final Object proxy, final Method method, final Obje LOG.error( String.format( "Retrying HMSHandler after %d ms (attempt %d of %d)", retryInterval, retryCount, retryLimit) + - " with error: " + ExceptionUtils.getStackTrace(caughtException)); + " with error calling method " + method.getName() + ": ", me); - Thread.sleep(retryInterval); + try { + Thread.sleep(retryInterval); + } catch (InterruptedException e) { + MetaException interruptMe = ExceptUtils.wrapMetastoreClientException(method.getName(), e); + LOG.error("Error happened calling method " + method.getName() + ": ", interruptMe); + throw interruptMe; + } // If we have a connection error, the JDO connection URL hook might // provide us with a new URL to access the datastore. String lastUrl = MetaStoreInit.getConnectionURL(getActiveConf()); -- 2.28.0.windows.1