diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 66f436b..9a10aea 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1334,7 +1334,7 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "The port of ZooKeeper servers to talk to.\n" + "If the list of Zookeeper servers specified in hive.zookeeper.quorum\n" + "does not contain port numbers, this value is used."), - HIVE_ZOOKEEPER_SESSION_TIMEOUT("hive.zookeeper.session.timeout", "600000ms", + HIVE_ZOOKEEPER_SESSION_TIMEOUT("hive.zookeeper.session.timeout", "1200000ms", new TimeValidator(TimeUnit.MILLISECONDS), "ZooKeeper client's session timeout (in milliseconds). The client is disconnected, and as a result, all locks released, \n" + "if a heartbeat is not sent in the timeout."), diff --git a/pom.xml b/pom.xml index c147d45..ed82a03 100644 --- a/pom.xml +++ b/pom.xml @@ -489,6 +489,11 @@ ${curator.version} + org.apache.curator + curator-recipes + ${curator.version} + + org.codehaus.groovy groovy-all ${groovy.version} diff --git a/service/pom.xml b/service/pom.xml index b9d3a40..94ffad5 100644 --- a/service/pom.xml +++ b/service/pom.xml @@ -91,6 +91,11 @@ curator-framework ${curator.version} + + org.apache.curator + curator-recipes + ${curator.version} + org.apache.hive diff --git a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java index b6e851a..629bea1 100644 --- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java +++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java @@ -92,10 +92,10 @@ public void run() { // TCP Server server = new TThreadPoolServer(sargs); - server.serve(); - String msg = "Started " + ThriftBinaryCLIService.class.getSimpleName() + " on port " + String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port " + portNum + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads"; LOG.info(msg); + server.serve(); } catch (Throwable t) { LOG.fatal( "Error starting HiveServer2: could not start " diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index 21025a2..f5306d0 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Properties; +import java.util.concurrent.TimeUnit; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; @@ -35,6 +36,10 @@ import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFrameworkFactory; import org.apache.curator.framework.api.ACLProvider; +import org.apache.curator.framework.api.BackgroundCallback; +import org.apache.curator.framework.api.CuratorEvent; +import org.apache.curator.framework.api.CuratorEventType; +import org.apache.curator.framework.recipes.nodes.PersistentEphemeralNode; import org.apache.curator.retry.ExponentialBackoffRetry; import org.apache.hadoop.hive.common.LogUtils; import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; @@ -43,7 +48,6 @@ import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl; import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; import org.apache.hadoop.hive.ql.util.ZooKeeperHiveHelper; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.common.util.HiveStringUtils; @@ -67,9 +71,11 @@ */ public class HiveServer2 extends CompositeService { private static final Log LOG = LogFactory.getLog(HiveServer2.class); + private static volatile boolean zooKeeperLastDeleteSuccessful = false; private CLIService cliService; private ThriftCLIService thriftCLIService; + private PersistentEphemeralNode znode; private String znodePath; private CuratorFramework zooKeeperClient; private boolean registeredWithZooKeeper = false; @@ -151,12 +157,15 @@ private void addServerInstanceToZooKeeper(HiveConf hiveConf) throws Exception { String instanceURI = getServerInstanceURI(hiveConf); byte[] znodeDataUTF8 = instanceURI.getBytes(Charset.forName("UTF-8")); setUpZooKeeperAuth(hiveConf); + int sessionTimeout = + (int) hiveConf.getTimeVar(HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT, + TimeUnit.MILLISECONDS); // Create a CuratorFramework instance to be used as the ZooKeeper client // Use the zooKeeperAclProvider to create appropriate ACLs zooKeeperClient = CuratorFrameworkFactory.builder().connectString(zooKeeperEnsemble) - .aclProvider(zooKeeperAclProvider).retryPolicy(new ExponentialBackoffRetry(1000, 3)) - .build(); + .sessionTimeoutMs(sessionTimeout).aclProvider(zooKeeperAclProvider) + .retryPolicy(new ExponentialBackoffRetry(1000, 3)).build(); zooKeeperClient.start(); // Create the parent znodes recursively; ignore if the parent already exists. try { @@ -176,18 +185,33 @@ private void addServerInstanceToZooKeeper(HiveConf hiveConf) throws Exception { ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + "serverUri=" + instanceURI + ";" + "version=" + HiveVersionInfo.getVersion() + ";" + "sequence="; - znodePath = - zooKeeperClient.create().creatingParentsIfNeeded() - .withMode(CreateMode.EPHEMERAL_SEQUENTIAL).forPath(pathPrefix, znodeDataUTF8); + znode = + new PersistentEphemeralNode(zooKeeperClient, + PersistentEphemeralNode.Mode.EPHEMERAL_SEQUENTIAL, pathPrefix, znodeDataUTF8); + znode.start(); + // We'll make 6 attempts, with each attempt waiting for 20 seconds for node creation + long znodeCreationTimeout = 20; + int maxNodeCreationAttempts = 6; + int attempts = 0; + while (!znode.waitForInitialCreate(znodeCreationTimeout, TimeUnit.SECONDS)) { + if (++attempts >= maxNodeCreationAttempts) { + throw new Exception("Max znode creation attempts " + maxNodeCreationAttempts + + " exhausted"); + } + } setRegisteredWithZooKeeper(true); + znodePath = znode.getActualPath(); // Set a watch on the znode if (zooKeeperClient.checkExists().usingWatcher(new DeRegisterWatcher()).forPath(znodePath) == null) { // No node exists, throw exception throw new Exception("Unable to create znode for this HiveServer2 instance on ZooKeeper."); } LOG.info("Created a znode on ZooKeeper for HiveServer2 uri: " + instanceURI); - } catch (KeeperException e) { + } catch (Exception e) { LOG.fatal("Unable to create a znode for this server instance", e); + if (znode != null) { + znode.close(); + } throw (e); } } @@ -223,22 +247,33 @@ private void setUpZooKeeperAuth(HiveConf hiveConf) throws Exception { @Override public void process(WatchedEvent event) { if (event.getType().equals(Watcher.Event.EventType.NodeDeleted)) { - HiveServer2.this.setRegisteredWithZooKeeper(false); - // If there are no more active client sessions, stop the server - if (cliService.getSessionManager().getOpenSessionCount() == 0) { - LOG.warn("This instance of HiveServer2 has been removed from the list of server " - + "instances available for dynamic service discovery. " - + "The last client session has ended - will shutdown now."); - HiveServer2.this.stop(); + if (znode != null) { + try { + znode.close(); + LOG.warn("This HiveServer2 instance is now de-registered from ZooKeeper. " + + "The server will be shut down after the last client sesssion completes."); + } catch (IOException e) { + LOG.error("Failed to close the persistent ephemeral znode", e); + } finally { + HiveServer2.this.setRegisteredWithZooKeeper(false); + // If there are no more active client sessions, stop the server + if (cliService.getSessionManager().getOpenSessionCount() == 0) { + LOG.warn("This instance of HiveServer2 has been removed from the list of server " + + "instances available for dynamic service discovery. " + + "The last client session has ended - will shutdown now."); + HiveServer2.this.stop(); + } + } } - LOG.warn("This HiveServer2 instance is now de-registered from ZooKeeper. " - + "The server will be shut down after the last client sesssion completes."); } } } private void removeServerInstanceFromZooKeeper() throws Exception { setRegisteredWithZooKeeper(false); + if (znode != null) { + znode.close(); + } zooKeeperClient.close(); LOG.info("Server instance removed from ZooKeeper."); } @@ -365,18 +400,60 @@ static void deleteServerInstancesFromZooKeeper(String versionNumber) throws Exce List znodePaths = zooKeeperClient.getChildren().forPath( ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace); + List znodePathsUpdated; + int maxDeleteAttempts = 5; + int deleteAttempts; // Now for each path that is for the given versionNumber, delete the znode from ZooKeeper - for (String znodePath : znodePaths) { + for (int i = 0; i < znodePaths.size(); i++) { + String znodePath = znodePaths.get(i); if (znodePath.contains("version=" + versionNumber + ";")) { - LOG.info("Removing the znode: " + znodePath + " from ZooKeeper"); - zooKeeperClient.delete().forPath( + String fullZnodePath = ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace - + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + znodePath); + + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + znodePath; + LOG.warn("Will attempt to remove the znode: " + fullZnodePath + " from ZooKeeper"); + // Reset the last deleted flag + zooKeeperLastDeleteSuccessful = false; + zooKeeperClient.delete().guaranteed().inBackground(new DeleteCallBack()) + .forPath(fullZnodePath); + // Wait for the delete to complete, sleep for 15s before checking the status + // Reset delete attempts for this node + deleteAttempts = 0; + while (!zooKeeperLastDeleteSuccessful) { + // Sleep for 15 seconds + try { + Thread.sleep(15L * 1000L); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + // Check if we've maxed out delete attempts + if (++deleteAttempts > maxDeleteAttempts) { + LOG.error("Unable to remove the znode: " + fullZnodePath + " from ZooKeeper"); + } + } + // Get the updated path list + znodePathsUpdated = + zooKeeperClient.getChildren().forPath( + ZooKeeperHiveHelper.ZOOKEEPER_PATH_SEPARATOR + rootNamespace); + // Gives a list of any new paths that may have been created to maintain the persistent ephemeral node + // (Curator's PersistentEphemeralNode renames the node while trying to keep it in ZK) + znodePathsUpdated.removeAll(znodePaths); + // Add the new paths to the znodes list. We'll try for their removal as well. + znodePaths.addAll(znodePathsUpdated); } } zooKeeperClient.close(); } + private static class DeleteCallBack implements BackgroundCallback { + @Override + public void processResult(CuratorFramework zooKeeperClient, CuratorEvent event) + throws Exception { + if (event.getType() == CuratorEventType.DELETE) { + HiveServer2.zooKeeperLastDeleteSuccessful = true; + } + } + } + public static void main(String[] args) { HiveConf.setLoadHiveServer2Config(true); try {