diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b213609f39..81eb06bdb1 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4094,6 +4094,12 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "By default, the clients are required to provide tokens to access HDFS/etc."), LLAP_ZKSM_ZK_CONNECTION_STRING("hive.llap.zk.sm.connectionString", "", "ZooKeeper connection string for ZooKeeper SecretManager."), + LLAP_ZKSM_ZK_CONNECTION_RETRY_COUNT("hive.llap.zk.sm.connection.retry.count", 10, + "Specify the number of times a ZooKeeper client within LLAP SecretManager should retry " + + "connecting to ZooKeeper delegation token manager."), + LLAP_ZKSM_ZK_CONNECTION_RETRY_WAIT("hive.llap.zk.sm.connection.retry.wait", "10ms", new TimeValidator( + TimeUnit.MILLISECONDS), "Specify the time to wait before a ZooKeeper client in LLAP SecretManager " + + "attempts to retry connecting to ZooKeeper delegation token manager."), LLAP_ZKSM_ZK_SESSION_TIMEOUT("hive.llap.zk.sm.session.timeout", "40s", new TimeValidator( TimeUnit.MILLISECONDS), "ZooKeeper session timeout for ZK SecretManager."), LLAP_ZK_REGISTRY_USER("hive.llap.zk.registry.user", "", diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/security/SecretManager.java b/llap-common/src/java/org/apache/hadoop/hive/llap/security/SecretManager.java index 372360897f..48fe61129e 100644 --- a/llap-common/src/java/org/apache/hadoop/hive/llap/security/SecretManager.java +++ b/llap-common/src/java/org/apache/hadoop/hive/llap/security/SecretManager.java @@ -27,6 +27,7 @@ import org.apache.curator.ensemble.fixed.FixedEnsembleProvider; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.retry.RetryNTimes; import org.apache.curator.retry.RetryOneTime; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -264,9 +265,12 @@ public void close() { private static void checkRootAcls(Configuration conf, String path, String user) { int stime = conf.getInt(ZK_DTSM_ZK_SESSION_TIMEOUT, ZK_DTSM_ZK_SESSION_TIMEOUT_DEFAULT), ctime = conf.getInt(ZK_DTSM_ZK_CONNECTION_TIMEOUT, ZK_DTSM_ZK_CONNECTION_TIMEOUT_DEFAULT); + int numberOfRetries = HiveConf.getIntVar(conf, ConfVars.LLAP_ZKSM_ZK_CONNECTION_RETRY_COUNT); + int sleepBetweenRetryMs = + (int) HiveConf.getTimeVar(conf, ConfVars.LLAP_ZKSM_ZK_CONNECTION_RETRY_WAIT, TimeUnit.MILLISECONDS); CuratorFramework zkClient = CuratorFrameworkFactory.builder().namespace(null) - .retryPolicy(new RetryOneTime(10)).sessionTimeoutMs(stime).connectionTimeoutMs(ctime) - .ensembleProvider(new FixedEnsembleProvider(conf.get(ZK_DTSM_ZK_CONNECTION_STRING))) + .retryPolicy(new RetryNTimes(numberOfRetries, sleepBetweenRetryMs)).sessionTimeoutMs(stime) + .connectionTimeoutMs(ctime).ensembleProvider(new FixedEnsembleProvider(conf.get(ZK_DTSM_ZK_CONNECTION_STRING))) .build(); // Hardcoded from a private field in ZKDelegationTokenSecretManager. // We need to check the path under what it sets for namespace, since the namespace is