diff --git llap-server/bin/llapDaemon.sh llap-server/bin/llapDaemon.sh index 010e1e6..4ec74f3 100755 --- llap-server/bin/llapDaemon.sh +++ llap-server/bin/llapDaemon.sh @@ -42,7 +42,6 @@ fi startStop=$1 shift - rotate_log () { log=$1; diff --git llap-server/bin/llapMonitorDaemon.sh llap-server/bin/llapMonitorDaemon.sh new file mode 100644 index 0000000..9664af6 --- /dev/null +++ llap-server/bin/llapMonitorDaemon.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash + +usage="Usage: llapMonitorDaemon.sh (start|stop) " + +if [ $# -le 0 ]; then + echo $usage + exit 1 +fi + +startStop=$1 +shift + +rotate_log () { + log=$1 + num=5 + if [ -n "$2" ]; then + num=$2 + fi + + if [ -f "$log" ]; then + while [ $num -gt 1 ]; do + prev=`expr $num - 1` + [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num" + num=$prev + done + mv "$log" "$log.$num"; + fi +} + +if [ "${LLAP_DAEMON_CONF_DIR}" = "" ] ; then + echo "LLAP_DAEMON_CONF_DIR must be specified" + exit 1 +fi + +if [ -f "${LLAP_DAEMON_CONF_DIR}/llap-daemon-env.sh" ] ; then + . "${LLAP_DAEMON_CONF_DIR}/llap-daemon-env.sh" +fi + +# get log directory +if [ "$LLAP_DAEMON_LOG_DIR" = "" ]; then + export LLAP_DAEMON_LOG_DIR="/tmp/llapMonitorDaemonLogs" +fi + +if [ ! -w "$LLAP_DAEMON_LOG_DIR" ] ; then + mkdir -p "$LLAP_DAEMON_LOG_DIR" + chown $USER $LLAP_DAEMON_LOG_DIR +fi + +if [ "$LLAP_DAEMON_PID_DIR" = "" ]; then + LLAP_DAEMON_PID_DIR=/tmp/$USER +fi + +# some variables +LLAP_DAEMON_LOG_BASE=llap-monitor-daemon-$USER-$HOSTNAME +export LLAP_DAEMON_LOG_FILE=$LLAP_DAEMON_LOG_BASE.log +if [ ! -n "${LLAP_DAEMON_LOGGER}" ]; then + echo "LLAP_DAEMON_LOGGER not defined... using defaults" + LLAP_DAEMON_LOGGER=${LOG_LEVEL_DEFAULT} +fi +logLog=$LLAP_DAEMON_LOG_DIR/$LLAP_DAEMON_LOG_BASE.log +logOut=$LLAP_DAEMON_LOG_DIR/$LLAP_DAEMON_LOG_BASE.out +pid=$LLAP_DAEMON_PID_DIR/llap-daemon.pid +LLAP_DAEMON_STOP_TIMEOUT=${LLAP_DAEMON_STOP_TIMEOUT:-2} + +# Set default scheduling priority +if [ "$LLAP_DAEMON_NICENESS" = "" ]; then + export LLAP_DAEMON_NICENESS=0 +fi + +case $startStop in + + (start) + + [ -w "$LLAP_DAEMON_PID_DIR" ] || mkdir -p "$LLAP_DAEMON_PID_DIR" + + if [ -f $pid ]; then + if kill -0 `cat $pid` > /dev/null 2>&1; then + echo llapdaemon running as process `cat $pid`. Stop it first. + exit 1 + fi + fi + + #rotate_log $logLog + #rotate_log $logOut + echo starting llapdaemon, logging to $logLog and $logOut + export LLAP_DAEMON_LOGFILE=${LLAP_DAEMON_LOG_BASE}.log + nohup nice -n $LLAP_DAEMON_NICENESS "$LLAP_DAEMON_BIN_HOME"/runLlapMonitorDaemon.sh run > "$logOut" 2>&1 < /dev/null & + echo $! > $pid + ;; + + (stop) + + if [ -f $pid ]; then + TARGET_PID=`cat $pid` + if kill -0 $TARGET_PID > /dev/null 2>&1; then + echo stopping llapDaemon + kill $TARGET_PID + sleep $LLAP_DAEMON_STOP_TIMEOUT + if kill -0 $TARGET_PID > /dev/null 2>&1; then + echo "llapDaemon did not stop gracefully after $LLAP_DAEMON_STOP_TIMEOUT seconds: killing with kill -9" + kill -9 $TARGET_PID + fi + else + echo no llapDaemon to stop + fi + rm -f $pid + else + echo no llapDaemon to stop + fi + ;; + + (*) + echo $usage + exit 1 + ;; + +esac diff --git llap-server/bin/runLlapMonitorDaemon.sh llap-server/bin/runLlapMonitorDaemon.sh new file mode 100644 index 0000000..d61c031 --- /dev/null +++ llap-server/bin/runLlapMonitorDaemon.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +function print_usage () { + echo "Usage: llap-monitor-daemon.sh [COMMAND]" + echo "Commands: " + echo " classpath print classpath" + echo " run run the daemon" +} + +if [ $# = 0 ]; then + print_usage + exit 1 +fi + +COMMAND=$1 +shift + +JAVA=$JAVA_HOME/bin/java +LOG_LEVEL_DEFAULT="INFO,console" +JAVA_OPTS_BASE="-server -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps" + +# CLASSPATH initially contains $HADOOP_CONF_DIR & $YARN_CONF_DIR +if [ ! -d "$HADOOP_CONF_DIR" ]; then + echo No HADOOP_CONF_DIR set, or is not a directory. + echo Please specify it in the environment. + exit 1 +fi + +if [ ! -d "${LLAP_DAEMON_HOME}" ]; then + echo No LLAP_DAEMON_HOME set, or is not a directory. + echo Please specify it in the environment. + exit 1 +fi + +if [ ! -d "${LLAP_DAEMON_CONF_DIR}" ]; then + echo No LLAP_DAEMON_CONF_DIR set, or is not a directory. + echo Please specify it in the environment. + exit 1 +fi + +if [ ! -n "${LLAP_DAEMON_LOGGER}" ]; then + echo "LLAP_DAEMON_LOGGER not defined... using defaults" + LLAP_DAEMON_LOGGER=${LOG_LEVEL_DEFAULT} +fi + +CLASSPATH=${LLAP_DAEMON_CONF_DIR}:${LLAP_DAEMON_HOME}/lib/*:`${HADOOP_PREFIX}/bin/hadoop classpath`:. + +if [ -n "LLAP_DAEMON_USER_CLASSPATH" ]; then + CLASSPATH=${CLASSPATH}:${LLAP_DAEMON_USER_CLASSPATH} +fi + +if [ ! -n "${LLAP_DAEMON_LOG_DIR}" ]; then + echo "LLAP_DAEMON_LOG_DIR not defined. Using default" + LLAP_DAEMON_LOG_DIR="/tmp/llapDaemonLogs" +fi + +if [ "$LLAP_DAEMON_LOGFILE" = "" ]; then + LLAP_DAEMON_LOG_FILE='llapdaemon.log' +fi + +if [ "$LLAP_DAEMON_HEAPSIZE" = "" ]; then + LLAP_DAEMON_HEAPSIZE=4096 +fi + +if [ -n "$LLAP_DAEMON_LD_PATH" ]; then + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LLAP_DAEMON_LD_PATH +fi + +if [ "$COMMAND" = "classpath" ] ; then + echo $CLASSPATH + exit +elif [ "$COMMAND" = "run" ] ; then + CLASS='org.apache.hadoop.hive.llap.daemon.impl.LlapMonitorDaemon' +fi + +LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} ${JAVA_OPTS_BASE}" + +# Set the default GC option if none set +if [[ ! "$LLAP_DAEMON_OPTS" =~ \+Use[^[:space:]]+GC ]] +then + LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -XX:+UseParallelGC" +fi + +# In general, avoid using the OS temporary directory +if [ -n "$LLAP_DAEMON_TMP_DIR" ]; then + export LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Djava.io.tmpdir=$LLAP_DAEMON_TMP_DIR" +fi + +LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dlog4j.configuration=llap-daemon-log4j.properties" +LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dllap.daemon.log.dir=${LLAP_DAEMON_LOG_DIR}" +LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dllap.daemon.log.file=${LLAP_DAEMON_LOG_FILE}" +LLAP_DAEMON_OPTS="${LLAP_DAEMON_OPTS} -Dllap.daemon.root.logger=${LLAP_DAEMON_LOGGER}" + +exec "$JAVA" -Dproc_llapdaemon -Xms${LLAP_DAEMON_HEAPSIZE}m -Xmx${LLAP_DAEMON_HEAPSIZE}m ${LLAP_DAEMON_OPTS} -classpath "$CLASSPATH" $CLASS "$@" + + + diff --git llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapMonitorDaemon.java llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapMonitorDaemon.java new file mode 100644 index 0000000..e49022d --- /dev/null +++ llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/LlapMonitorDaemon.java @@ -0,0 +1,142 @@ +package org.apache.hadoop.hive.llap.daemon.impl; + +import java.io.IOException; +import java.lang.Exception; +import java.lang.Override; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.lang.management.MemoryPoolMXBean; +import java.lang.management.MemoryType; +import java.net.InetSocketAddress; +import java.util.Arrays; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +import javax.management.ObjectName; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.llap.configuration.LlapConfiguration; +import org.apache.hadoop.hive.llap.daemon.ContainerRunner; +import org.apache.hadoop.hive.llap.daemon.QueryFailedHandler; +import org.apache.hadoop.hive.llap.daemon.registry.impl.LlapRegistryService; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteRequestProto; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceStateUpdatedRequestProto; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.TerminateFragmentRequestProto; +import org.apache.hadoop.hive.llap.daemon.services.impl.LlapWebServices; +import org.apache.hadoop.hive.llap.io.api.LlapIoProxy; +import org.apache.hadoop.hive.llap.metrics.LlapDaemonExecutorMetrics; +import org.apache.hadoop.hive.llap.metrics.LlapMetricsSystem; +import org.apache.hadoop.hive.llap.metrics.MetricsUtils; +import org.apache.hadoop.hive.llap.shufflehandler.ShuffleHandler; +import org.apache.hadoop.metrics2.util.MBeans; +import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.util.JvmPauseMonitor; +import org.apache.hadoop.util.StringUtils; +import org.apache.hive.common.util.ShutdownHookManager; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class LlapMonitorDaemon extends CompositeService { + private static final Logger LOG = LoggerFactory.getLogger(LlapMonitorDaemon.class); + + private final LlapWebServices webServices; + + // TODO Not the best way to share the address + private final AtomicReference address = new AtomicReference(); + + public LlapMonitorDaemon(Configuration daemonConf ) { + super("LlapMonitorDaemon"); + LOG.info("Launch LlapMonitorDaemon"); + + this.webServices = new LlapWebServices("llap-monitor"); + addIfService(webServices); + } + + @Override + public void serviceInit(Configuration conf) throws Exception { + super.serviceInit(conf); + LOG.info("LlapMonitorDaemon serviceInit complete"); + } + + @Override + public void serviceStart() throws Exception { + super.serviceStart(); + LOG.info("LlapMonitorDaemon serviceStart complete"); + } + + public void serviceStop() throws Exception { + super.serviceStop(); + shutdown(); + LOG.info("LlapMonitorDaemon serviceStop"); + } + + public void shutdown() { + LOG.info("LlapMonitorDaemon shutdown complete"); + + //webServices.stop(); + } + + + public static void main(String[] args) throws Exception { + Thread.setDefaultUncaughtExceptionHandler(new LlapMonitorDaemonUncaughtExceptionHandler()); + LlapMonitorDaemon daemon = null; + + try { + LlapConfiguration daemonConf = new LlapConfiguration(); + int numExecutors = daemonConf.getInt(LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS, + LlapConfiguration.LLAP_DAEMON_NUM_EXECUTORS_DEFAULT); + + String[] localDirs = + daemonConf.getTrimmedStrings(LlapConfiguration.LLAP_DAEMON_WORK_DIRS); + int rpcPort = daemonConf.getInt(LlapConfiguration.LLAP_DAEMON_RPC_PORT, + LlapConfiguration.LLAP_DAEMON_RPC_PORT_DEFAULT); + int shufflePort = daemonConf + .getInt(ShuffleHandler.SHUFFLE_PORT_CONFIG_KEY, ShuffleHandler.DEFAULT_SHUFFLE_PORT); + long executorMemoryBytes = daemonConf + .getInt(LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB, + LlapConfiguration.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB_DEFAULT) * 1024l * 1024l; + long cacheMemoryBytes = + HiveConf.getLongVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_MAX_SIZE); + boolean isDirectCache = + HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_ORC_CACHE_ALLOCATE_DIRECT); + boolean llapIoEnabled = HiveConf.getBoolVar(daemonConf, HiveConf.ConfVars.LLAP_IO_ENABLED); + + daemon = new LlapMonitorDaemon(daemonConf); + daemon.init(daemonConf); + daemon.start(); + + LOG.info("Started LlapMonitorDaemon"); + } catch (Throwable t) { + LOG.warn("Failed to start LLAP Monitor Daemon with exception", t); + if (daemon != null) { + daemon.shutdown(); + } + System.exit(-1); + } + } + + private static class LlapMonitorDaemonUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler { + @Override + public void uncaughtException(Thread t, Throwable e) { + LOG.info("UncaughtExceptionHandler invoked"); + if (e instanceof Error) { + LOG.error("Thread {} threw an error. Shutting down now...", t, e); + e.printStackTrace(); + } else if (e instanceof OutOfMemoryError) { + LOG.warn("OutofMemoryError has been occurred."); + ExitUtil.halt(-1); + } else { + ExitUtil.terminate(-1); + } + } + } +} \ No newline at end of file diff --git llap-server/src/java/org/apache/hadoop/hive/llap/daemon/services/impl/LlapWebServices.java llap-server/src/java/org/apache/hadoop/hive/llap/daemon/services/impl/LlapWebServices.java index 2275719..9dea34e 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/daemon/services/impl/LlapWebServices.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/daemon/services/impl/LlapWebServices.java @@ -4,6 +4,7 @@ import org.apache.hadoop.hive.llap.configuration.LlapConfiguration; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.webapp.Controller; import org.apache.hadoop.yarn.webapp.WebApp; import org.apache.hadoop.yarn.webapp.WebApps; import org.apache.log4j.Logger; @@ -19,7 +20,11 @@ private LlapWebApp webAppInstance; public LlapWebServices() { - super("LlapWebServices"); + super("llap"); + } + + public LlapWebServices(String name) { + super(name); } @Override @@ -29,9 +34,9 @@ public void serviceInit(Configuration conf) { this.conf.addResource(YarnConfiguration.YARN_SITE_CONFIGURATION_FILE); this.port = conf.getInt(LlapConfiguration.LLAP_DAEMON_SERVICE_PORT, - LlapConfiguration.LLAP_DAEMON_SERVICE_PORT_DEFAULT); + LlapConfiguration.LLAP_DAEMON_SERVICE_PORT_DEFAULT); this.ssl = conf.getBoolean(LlapConfiguration.LLAP_DAEMON_SERVICE_SSL, - LlapConfiguration.LLAP_DAEMON_SERVICE_SSL_DEFAULT); + LlapConfiguration.LLAP_DAEMON_SERVICE_SSL_DEFAULT); this.webAppInstance = new LlapWebApp(); } @@ -39,10 +44,15 @@ public void serviceInit(Configuration conf) { @Override public void serviceStart() throws Exception { String bindAddress = "0.0.0.0"; - this.webApp = - WebApps.$for("llap").at(bindAddress).at(port).with(getConfig()) + LOG.info("LlapWebServices serviceStart"); + LOG.info(getConfig().toString()); + WebApps.Builder builder = + WebApps.$for(getName()).at(bindAddress).at(port).with(getConfig()) + .withServlet("llap-monitor", "/llap-monitor", LlapWebServlet.class) + .inDevMode(); /* TODO: security negotiation here */ - .start(); + + builder.start(); } public void serviceStop() throws Exception { diff --git llap-server/src/java/org/apache/hadoop/hive/llap/daemon/services/impl/LlapWebServlet.java llap-server/src/java/org/apache/hadoop/hive/llap/daemon/services/impl/LlapWebServlet.java new file mode 100644 index 0000000..426fc2a --- /dev/null +++ llap-server/src/java/org/apache/hadoop/hive/llap/daemon/services/impl/LlapWebServlet.java @@ -0,0 +1,34 @@ +package org.apache.hadoop.hive.llap.daemon.services.impl; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.IOException; +import java.io.PrintWriter; + +/** + * Created by kaisasak on 8/26/15. + */ +public class LlapWebServlet extends HttpServlet { + private static final Logger LOG = LoggerFactory.getLogger(LlapWebServlet.class); + + public LlapWebServlet() { + super(); + LOG.info("LlapWebServlet is constructed"); + } + + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + LOG.info("LlapWebServlet doGet"); + PrintWriter out = resp.getWriter(); + out.println(""); + out.println(" "); + out.println("

Hello, World

"); + out.println(" "); + out.println(""); + } +} diff --git llap-server/src/main/resources/llap-monitor.py llap-server/src/main/resources/llap-monitor.py new file mode 100644 index 0000000..7392387 --- /dev/null +++ llap-server/src/main/resources/llap-monitor.py @@ -0,0 +1,62 @@ +## This is a test script for LLAP Monitor + +import sys +import os +import subprocess +from resource_management import * +from os.path import dirname + +class LlapMonitor(Script): + + def install(self, env): + self.install_packages(env) + + print("Install LLAP Monitor") + pass + + def configure(self, env): + import params + env.set_params(params) + + print("Configure LLAP Monitor") + + def start(self, env): + import params + env.set_params(params) + os.environ['JAVA_HOME'] = format('{java64_home}') + # this is the same as TEZ_PREFIX + os.environ['LLAP_DAEMON_HOME'] = format('{app_root}') + os.environ['LLAP_DAEMON_TMP_DIR'] = format('{app_tmp_dir}') + # this is the location where we have the llap server components (shell scripts) + os.environ['LLAP_DAEMON_BIN_HOME'] = format('{app_root}/bin') + # location containing llap-daemon-site.xml, tez and yarn configuration xmls as well. + os.environ['LLAP_DAEMON_CONF_DIR'] = format("{app_root}/conf/") + os.environ['LLAP_DAEMON_LOG_DIR'] = format("{app_log_dir}/") + os.environ['LLAP_DAEMON_LOGGER'] = format("{app_log_level}") + os.environ['LLAP_DAEMON_HEAPSIZE'] = format("{memory_val}") + os.environ['LLAP_DAEMON_PID_DIR'] = dirname(format("{pid_file}")) + os.environ['LLAP_DAEMON_LD_PATH'] = format('{library_path}') + os.environ['LLAP_DAEMON_OPTS'] = format('{daemon_args}') + print "Debug from LLAPMonitor python script" + print os.environ['LLAP_DAEMON_CONF_DIR'] + self.configure(env) + location = "bash -x {app_root}/bin/llapMonitorDaemon.sh start &> {app_log_dir}/shell.out" + process_cmd = format(location) + + subprocess.call(process_cmd, shell=True) + + + def stop(self, env): + import params + env.set_params(params) + + print("Stop LLAP Monitor") + + def status(self, env): + import params + env.set_params(params) + check_process_status(params.pid_file) + + +if __name__ == "__main__": + LlapMonitor().execute() \ No newline at end of file diff --git llap-server/src/main/resources/templates.py llap-server/src/main/resources/templates.py index dae0afb..3b519de 100644 --- llap-server/src/main/resources/templates.py +++ llap-server/src/main/resources/templates.py @@ -45,6 +45,17 @@ PYTHON + + + LLAPMonitor + MASTER + + Servers-instances + + + PYTHON + + @@ -107,6 +118,11 @@ "yarn.role.priority": "1", "yarn.component.instances": "%(instances)d", "yarn.memory": "%(container.mb)d" + }, + "LLAPMonitor": { + "yarn.role.priority": "2", + "yarn.component.instances": "1", + "yarn.memory": "%(container.mb)d" } } }