diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b8870f2..5a9c98a 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -214,6 +214,7 @@ public String decode(String value) { HiveConf.ConfVars.HIVE_TXN_TIMEOUT, HiveConf.ConfVars.HIVE_TXN_HEARTBEAT_THREADPOOL_SIZE, HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, + HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_ENABLED, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_SIZE, @@ -1677,6 +1678,11 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "transactions that Hive has to track at any given time, which may negatively affect\n" + "read performance."), + HIVE_TXN_RETRYABLE_SQLEX_REGEX("hive.txn.retryable.sqlex.regex", "", "Comma separated list\n" + + "of regular expression patterns for SQL state, error code, and error message of\n" + + "retryable SQLExceptions, that's suitable for the metastore DB.\n" + + "For example: Can't serialize.*,40001$,^Deadlock,.*ORA-08176.*\n"), + HIVE_COMPACTOR_INITIATOR_ON("hive.compactor.initiator.on", false, "Whether to run the initiator and cleaner threads on this metastore instance or not.\n" + "Set this to true on one instance of the Thrift metastore service as part of turning\n" + diff --git metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index 21faff4..4d6955f 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -49,6 +49,7 @@ import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantLock; +import java.util.regex.Pattern; /** * A handler to answer transaction related calls that come into the metastore @@ -1507,7 +1508,7 @@ protected void checkRetryable(Connection conn, } else { LOG.error("Too many repeated deadlocks in " + caller + ", giving up."); } - } else if (isRetryable(e)) { + } else if (isRetryable(conf, e)) { //in MSSQL this means Communication Link Failure if (retryNum++ < retryLimit) { LOG.warn("Retryable error detected in " + caller + ". Will wait " + retryInterval + @@ -2592,7 +2593,7 @@ private static synchronized void buildJumpTable() { /** * Returns true if {@code ex} should be retried */ - private static boolean isRetryable(Exception ex) { + static boolean isRetryable(HiveConf conf, Exception ex) { if(ex instanceof SQLException) { SQLException sqlException = (SQLException)ex; if("08S01".equalsIgnoreCase(sqlException.getSQLState())) { @@ -2603,6 +2604,17 @@ private static boolean isRetryable(Exception ex) { sqlException.getMessage().contains("consistent read failure; rollback data not available")) { return true; } + + String regex = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX); + if (regex != null && !regex.isEmpty()) { + String[] patterns = regex.split(",(?=\\S)"); + String message = getMessage((SQLException)ex); + for (String p : patterns) { + if (Pattern.matches(p, message)) { + return true; + } + } + } //see also https://issues.apache.org/jira/browse/HIVE-9938 } return false; @@ -2642,7 +2654,7 @@ private boolean isDuplicateKeyError(SQLException ex) { return false; } private static String getMessage(SQLException ex) { - return ex.getMessage() + "(SQLState=" + ex.getSQLState() + ",ErrorCode=" + ex.getErrorCode() + ")"; + return ex.getMessage() + " (SQLState=" + ex.getSQLState() + ",ErrorCode=" + ex.getErrorCode() + ")"; } /** * Given a {@code selectStatement}, decorated it with FOR UPDATE or semantically equivalent diff --git metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java index 26a660a..42d2d85 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java @@ -1346,6 +1346,16 @@ public void uncaughtException(Thread t, Throwable e) { error = e; } } + + @Test + public void testRetryableRegex() throws Exception { + SQLException sqlException = new SQLException("ORA-08177: can't serialize access for this transaction", "72000"); + // Note that we have 3 regex'es below + conf.setVar(HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, "^Deadlock detected, roll back,.*08177.*,.*08178.*"); + boolean result = TxnHandler.isRetryable(conf, sqlException); + Assert.assertTrue("regex should be retryable", result); + } + private void updateTxns(Connection conn) throws SQLException { Statement stmt = conn.createStatement(); stmt.executeUpdate("update TXNS set txn_last_heartbeat = txn_last_heartbeat + 1");