diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a479deb..8c7b49b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3049,8 +3049,12 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal LLAP_DAEMON_QUEUE_NAME("hive.llap.daemon.queue.name", null, "Queue name within which the llap slider application will run." + " Used in LlapServiceDriver and package.py"), + // TODO Move the following 2 properties out of Configuration to a constant. LLAP_DAEMON_CONTAINER_ID("hive.llap.daemon.container.id", null, "ContainerId of a running LlapDaemon. Used to publish to the registry"), + LLAP_DAEMON_NM_ADDRESS("hive.llap.daemon.nm.address", null, + "NM Address host:rpcPort for the NodeManager on which the instance of the daemon is running.\n" + + "Published to the llap registry. Should never be set by users"), LLAP_DAEMON_SHUFFLE_DIR_WATCHER_ENABLED("hive.llap.daemon.shuffle.dir.watcher.enabled", false, "TODO doc", "llap.daemon.shuffle.dir-watcher.enabled"), LLAP_DAEMON_AM_LIVENESS_HEARTBEAT_INTERVAL_MS( diff --git llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java index 6a5adc9..95bc675 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java @@ -472,6 +472,7 @@ public static void main(String[] args) throws Exception { LlapDaemonConfiguration daemonConf = new LlapDaemonConfiguration(); String containerIdStr = System.getenv(ApplicationConstants.Environment.CONTAINER_ID.name()); + String appName = null; if (containerIdStr != null && !containerIdStr.isEmpty()) { daemonConf.set(ConfVars.LLAP_DAEMON_CONTAINER_ID.varname, containerIdStr); @@ -486,6 +487,19 @@ public static void main(String[] args) throws Exception { appName = null; } + String nmHost = System.getenv(ApplicationConstants.Environment.NM_HOST.name()); + String nmPort = System.getenv(ApplicationConstants.Environment.NM_PORT.name()); + if (!org.apache.commons.lang3.StringUtils.isBlank(nmHost) && !org.apache.commons.lang3.StringUtils.isBlank(nmPort)) { + String nmAddress = nmHost + ":" + nmPort; + daemonConf.set(ConfVars.LLAP_DAEMON_NM_ADDRESS.varname, nmAddress); + } else { + daemonConf.unset(ConfVars.LLAP_DAEMON_NM_ADDRESS.varname); + // Unlikely, but log the actual values in case one of the two was empty/null + LOG.warn( + "NodeManager host/port not found in environment. Values retrieved: host={}, port={}", + nmHost, nmPort); + } + int numExecutors = HiveConf.getIntVar(daemonConf, ConfVars.LLAP_DAEMON_NUM_EXECUTORS); String workDirsString = System.getenv(ApplicationConstants.Environment.LOCAL_DIRS.name()); diff --git llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java index 8bb6cab..18ce03c 100644 --- llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java +++ llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskCommunicator.java @@ -14,6 +14,7 @@ package org.apache.hadoop.hive.llap.tezplugins; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.llap.registry.ServiceInstance; import org.apache.hadoop.io.Writable; @@ -120,7 +121,6 @@ private final String user; private String amHost; private String timelineServerUri; - private int nmPort; // These two structures track the list of known nodes, and the list of nodes which are sending in keep-alive heartbeats. // Primarily for debugging purposes a.t.m, since there's some unexplained TASK_TIMEOUTS which are currently being observed. @@ -194,7 +194,6 @@ public void initialize() throws Exception { String scheme = WebAppUtils.getHttpSchemePrefix(conf); String ahsUrl = WebAppUtils.getAHSWebAppURLWithoutScheme(conf); this.timelineServerUri = WebAppUtils.getURLWithScheme(scheme, ahsUrl); - this.nmPort = Integer.valueOf(WebAppUtils.getNMWebAppURLWithoutScheme(conf).split(":")[1]); } @Override @@ -554,6 +553,7 @@ private String constructLogUrl(final TezTaskAttemptID attemptID, final NodeId co containerNodeId, e.getMessage()); return null; } + // Once NodeId includes fragmentId - this becomes a lot more reliable. if (instanceSet != null) { ServiceInstance matchedInstance = null; for (ServiceInstance instance : instanceSet) { @@ -565,8 +565,9 @@ private String constructLogUrl(final TezTaskAttemptID attemptID, final NodeId co if (matchedInstance != null) { String containerIdString = matchedInstance.getProperties() .get(HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname); - if (containerIdString != null) { - return constructLlapLogUrl(attemptID, containerIdString, isDone, containerNodeId.getHost()); + String nmNodeAddress = matchedInstance.getProperties().get(ConfVars.LLAP_DAEMON_NM_ADDRESS.varname); + if (!StringUtils.isBlank(containerIdString) && !StringUtils.isBlank(nmNodeAddress)) { + return constructLlapLogUrl(attemptID, containerIdString, isDone, nmNodeAddress); } } } @@ -574,10 +575,10 @@ private String constructLogUrl(final TezTaskAttemptID attemptID, final NodeId co } private String constructLlapLogUrl(final TezTaskAttemptID attemptID, final String containerIdString, - final boolean isDone, final String nmHost) { + final boolean isDone, final String nmAddress) { String dagId = attemptID.getTaskID().getVertexID().getDAGId().toString(); String filename = JOINER.join(currentHiveQueryId, "-", dagId, ".log", (isDone ? ".done" : ""), - "?nm.id=", nmHost, ":", nmPort); + "?nm.id=", nmAddress); String url = PATH_JOINER.join(timelineServerUri, "ws", "v1", "applicationhistory", "containers", containerIdString, "logs", filename); return url;