diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java index eac1c42..f4d041f 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapDaemon.java @@ -14,18 +14,20 @@ package org.apache.hadoop.hive.llap.daemon.impl; -import org.apache.hadoop.hive.llap.LlapOutputFormatService; +import java.io.File; import java.io.IOException; import java.lang.management.ManagementFactory; import java.lang.management.MemoryPoolMXBean; import java.lang.management.MemoryType; import java.net.InetSocketAddress; import java.net.URL; +import java.nio.file.Files; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.List; +import java.util.Scanner; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -34,12 +36,14 @@ import javax.net.SocketFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.JvmPauseMonitor; import org.apache.hadoop.hive.common.LogUtils; import org.apache.hadoop.hive.common.UgiFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.DaemonId; +import org.apache.hadoop.hive.llap.LlapOutputFormatService; import org.apache.hadoop.hive.llap.LlapUtil; import org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration; import org.apache.hadoop.hive.llap.daemon.ContainerRunner; @@ -88,7 +92,10 @@ public class LlapDaemon extends CompositeService implements ContainerRunner, LlapDaemonMXBean { private static final Logger LOG = LoggerFactory.getLogger(LlapDaemon.class); - + private static final String PID_DIR_ENV = "LLAP_DAEMON_PID_DIR"; + private static final String PID_FILE = "llap-daemon.pid"; + private static final String PROC_SELF = Path.SEPARATOR + "proc" + Path.SEPARATOR + "self"; + private static int llapDaemonPid = getPid(); private final Configuration shuffleHandlerConf; private final SecretManager secretManager; private final LlapProtocolServerImpl server; @@ -241,7 +248,7 @@ public LlapDaemon(Configuration daemonConf, int numExecutors, long executorMemor pauseMonitor.start(); String displayNameJvm = "LlapDaemonJvmMetrics-" + hostName; String sessionId = MetricsUtils.getUUID(); - LlapDaemonJvmMetrics.create(displayNameJvm, sessionId); + LlapDaemonJvmMetrics.create(displayNameJvm, sessionId, daemonConf); String displayName = "LlapDaemonExecutorMetrics-" + hostName; daemonConf.set("llap.daemon.metrics.sessionid", sessionId); String[] strIntervals = HiveConf.getTrimmedStringsVar(daemonConf, @@ -373,6 +380,62 @@ public static long getTotalHeapSize() { return total; } + private static int getPid() { + // try getting PID from llap-daemon.pid file + final String pidDir = System.getenv(PID_DIR_ENV); + if (pidDir != null) { + File pidFile = new File(pidDir + File.separator + PID_FILE); + if (pidFile.exists() && pidFile.isFile()) { + try (Scanner reader = new Scanner(pidFile)) { + llapDaemonPid = reader.nextInt(); + return llapDaemonPid; + } catch (IOException e) { + // file not found, not a problem since we have already make exists check. log other exceptions + LOG.warn("Unable to read PID file {}", pidFile, e); + } + } + } else { + LOG.warn("{} not set. Trying to get PID by reading link of {}..", PID_DIR_ENV, PROC_SELF); + } + + // if this PID file does not exist, try expanding symlink of /proc/self + String pidStr = ""; + try { + pidStr = Files.readSymbolicLink(new File(PROC_SELF).toPath()).getFileName().toString(); + llapDaemonPid = Integer.parseInt(pidStr); + return llapDaemonPid; + } catch (IOException e) { + LOG.warn("Unable to readlink {}", PROC_SELF, e); + } catch (NumberFormatException e) { + LOG.warn("Unable to parse PID ({}) from {} readlink. {}", pidStr, PROC_SELF, e); + } + + // if /proc/self also fails, try to get from RuntimeMXBean (NOTE: this is implementation specific, last resort) + // default getName() returns PID@hostname + String jvmName = ManagementFactory.getRuntimeMXBean().getName(); + if (jvmName.contains("@")) { + try { + llapDaemonPid = Integer.parseInt(jvmName.split("@")[0]); + return llapDaemonPid; + } catch (NumberFormatException e) { + LOG.warn("Unable to parse PID RuntimeMXBean.getName(). {}", jvmName, e); + } + } + + LOG.error("Unable to determine PID of LLAP daemon. JVM name: {}", jvmName); + // unable to find PID by all above means + llapDaemonPid = -1; + return llapDaemonPid; + } + + /** + * Returns PID of running LLAP daemon. + * @return - PID of llap daemon process or -1 if PID cannot be determined + */ + public static int getLlapDaemonPid() { + return llapDaemonPid; + } + private void printAsciiArt() { final String asciiArt = "" + "$$\\ $$\\ $$$$$$\\ $$$$$$$\\\n" + @@ -534,7 +597,7 @@ public static void main(String[] args) throws Exception { llapDaemon.init(daemonConf); llapDaemon.start(); - LOG.info("Started LlapDaemon"); + LOG.info("Started LlapDaemon with PID: {}", getLlapDaemonPid()); // Relying on the RPC threads to keep the service alive. } catch (Throwable t) { // TODO Replace this with a ExceptionHandler / ShutdownHook diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonJvmInfo.java b/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonJvmInfo.java index efbddaa..11ac5a4 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonJvmInfo.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonJvmInfo.java @@ -38,6 +38,8 @@ LlapDaemonMappedBufferMemoryUsed("Estimate of memory that JVM is using for mapped byte buffers in bytes"), LlapDaemonOpenFileDescriptorCount("Number of open file descriptors"), LlapDaemonMaxFileDescriptorCount("Maximum number of file descriptors"), + LlapDaemonResidentSetSize("Resident memory (RSS) used by llap daemon process in bytes"), + LlapDaemonVirtualMemorySize("Virtual memory (VMEM) used by llap daemon process in bytes") ; private final String desc; diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonJvmMetrics.java b/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonJvmMetrics.java index cfb8729..0967c51 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonJvmMetrics.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/metrics/LlapDaemonJvmMetrics.java @@ -26,6 +26,8 @@ import static org.apache.hadoop.hive.llap.metrics.LlapDaemonJvmInfo.LlapDaemonMappedBufferTotalCapacity; import static org.apache.hadoop.hive.llap.metrics.LlapDaemonJvmInfo.LlapDaemonMaxFileDescriptorCount; import static org.apache.hadoop.hive.llap.metrics.LlapDaemonJvmInfo.LlapDaemonOpenFileDescriptorCount; +import static org.apache.hadoop.hive.llap.metrics.LlapDaemonJvmInfo.LlapDaemonResidentSetSize; +import static org.apache.hadoop.hive.llap.metrics.LlapDaemonJvmInfo.LlapDaemonVirtualMemorySize; import static org.apache.hadoop.metrics2.impl.MsInfo.ProcessName; import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId; @@ -34,12 +36,16 @@ import java.lang.management.OperatingSystemMXBean; import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon; import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsSource; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; import com.sun.management.UnixOperatingSystemMXBean; @@ -51,24 +57,34 @@ private final String name; private final String sessionId; private final MetricsRegistry registry; + private final ResourceCalculatorProcessTree processTree; + private final int daemonPid = LlapDaemon.getLlapDaemonPid(); - private LlapDaemonJvmMetrics(String displayName, String sessionId) { + private LlapDaemonJvmMetrics(String displayName, String sessionId, final Configuration conf) { this.name = displayName; this.sessionId = sessionId; + Class clazz = conf.getClass(YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE, + null, ResourceCalculatorProcessTree.class); + this.processTree = ResourceCalculatorProcessTree.getResourceCalculatorProcessTree("" + daemonPid, clazz, conf); + if (processTree != null) { + this.processTree.setConf(conf); + } this.registry = new MetricsRegistry("LlapDaemonJvmRegistry"); this.registry.tag(ProcessName, MetricsUtils.METRICS_PROCESS_NAME).tag(SessionId, sessionId); } - public static LlapDaemonJvmMetrics create(String displayName, String sessionId) { + public static LlapDaemonJvmMetrics create(String displayName, String sessionId, + final Configuration conf) { MetricsSystem ms = LlapMetricsSystem.instance(); - return ms.register(displayName, "LlapDaemon JVM Metrics", new LlapDaemonJvmMetrics(displayName, sessionId)); + return ms.register(displayName, "LlapDaemon JVM Metrics", + new LlapDaemonJvmMetrics(displayName, sessionId, conf)); } @Override public void getMetrics(MetricsCollector collector, boolean b) { MetricsRecordBuilder rb = collector.addRecord(LlapDaemonJVMMetrics) .setContext("jvm") - .tag(ProcessName, MetricsUtils.METRICS_PROCESS_NAME) + .tag(ProcessName, MetricsUtils.METRICS_PROCESS_NAME + "(PID: " + daemonPid + ")") .tag(SessionId, sessionId); getJvmMetrics(rb); } @@ -100,6 +116,12 @@ private void getJvmMetrics(final MetricsRecordBuilder rb) { openFileHandles = ((UnixOperatingSystemMXBean) os).getOpenFileDescriptorCount(); maxFileHandles = ((UnixOperatingSystemMXBean) os).getMaxFileDescriptorCount(); } + long rss = 0; + long vmem = 0; + if (processTree != null) { + rss = processTree.getRssMemorySize(); + vmem = processTree.getVirtualMemorySize(); + } rb.addGauge(LlapDaemonDirectBufferCount, directBufferCount) .addGauge(LlapDaemonDirectBufferTotalCapacity, directBufferTotalCapacity) .addGauge(LlapDaemonDirectBufferMemoryUsed, directBufferMemoryUsed) @@ -107,7 +129,9 @@ private void getJvmMetrics(final MetricsRecordBuilder rb) { .addGauge(LlapDaemonMappedBufferTotalCapacity, mappedBufferTotalCapacity) .addGauge(LlapDaemonMappedBufferMemoryUsed, mappedBufferMemoryUsed) .addGauge(LlapDaemonOpenFileDescriptorCount, openFileHandles) - .addGauge(LlapDaemonMaxFileDescriptorCount, maxFileHandles); + .addGauge(LlapDaemonMaxFileDescriptorCount, maxFileHandles) + .addGauge(LlapDaemonResidentSetSize, rss) + .addGauge(LlapDaemonVirtualMemorySize, vmem); } public String getName() {