diff --git llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java index 8cd6df7..de6d9b8 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapServiceDriver.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.net.URI; @@ -36,6 +35,7 @@ import java.util.Properties; import java.util.Set; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration; import org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon; import org.apache.hadoop.hive.llap.daemon.impl.StaticPermanentFunctionChecker; @@ -55,16 +55,12 @@ import org.apache.hadoop.hive.llap.cli.LlapOptionsProcessor.LlapOptions; import org.apache.hadoop.hive.llap.io.api.impl.LlapInputFormat; import org.apache.hadoop.hive.metastore.api.Function; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.ResourceUri; -import org.apache.hadoop.hive.ql.exec.FunctionTask; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.FunctionInfo.FunctionResource; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; import org.apache.hadoop.hive.ql.util.ResourceDownloader; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.JobConf; @@ -85,6 +81,9 @@ private static final String[] NEEDED_CONFIGS = LlapDaemonConfiguration.DAEMON_CONFIGS; private static final String[] OPTIONAL_CONFIGS = LlapDaemonConfiguration.SSL_DAEMON_CONFIGS; + // This is not a config that users set in hive-site. It's only use is to share information + // between the java component of the service driver and the python component. + private static final String CONFIG_CLUSTER_NAME = "private.hive.llap.servicedriver.cluster.name"; /** * This is a working configuration for the instance to merge various variables. @@ -98,6 +97,7 @@ public LlapServiceDriver() { } public static void main(String[] args) throws Exception { + LOG.info("LLAP service driver invoked with arguments={}", args); int ret = 0; try { new LlapServiceDriver().run(args); @@ -105,6 +105,8 @@ public static void main(String[] args) throws Exception { System.err.println("Failed: " + t.getMessage()); t.printStackTrace(); ret = 3; + } finally { + LOG.info("LLAP service driver finished"); } if (LOG.isDebugEnabled()) { LOG.debug("Completed processing - exiting with " + ret); @@ -134,7 +136,7 @@ private static void populateConf(Configuration configured, Configuration target, } } - private static void populateConfWithLlapProperties(Configuration conf, Properties properties) { + static void populateConfWithLlapProperties(Configuration conf, Properties properties) { for(Entry props : properties.entrySet()) { String key = (String) props.getKey(); if (HiveConf.getLlapDaemonConfVars().contains(key)) { @@ -445,6 +447,13 @@ private void run(String[] args) throws Exception { HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_QUEUE_NAME)); } + // Propagate the cluster name to the script. + String clusterHosts = HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_SERVICE_HOSTS); + if (!StringUtils.isEmpty(clusterHosts) && clusterHosts.startsWith("@") && + clusterHosts.length() > 1) { + configs.put(CONFIG_CLUSTER_NAME, clusterHosts.substring(1)); + } + configs.put(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1)); diff --git llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java index e3a100c..cb848c0 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.llap.cli; +import java.util.Properties; +import java.util.concurrent.TimeUnit; + import jline.TerminalFactory; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; @@ -33,11 +36,16 @@ private static final String LLAPSTATUS_CONSTANT = "llapstatus"; + private static final long FIND_YARN_APP_TIMEOUT_MS = 20 * 1000l; // 20seconds to wait for app to be visible + enum OptionConstants { NAME("name", 'n', "LLAP cluster name"), + FIND_APP_TIMEOUT("findAppTimeout", 'f', + "Amount of time(s) that the tool will sleep to wait for the YARN application to start. negative values=wait forever, 0=Do not wait. default=" + + TimeUnit.SECONDS.convert(FIND_YARN_APP_TIMEOUT_MS, TimeUnit.MILLISECONDS) + "s"), HIVECONF("hiveconf", null, "Use value for given property. Overridden by explicit parameters", "property=value", 2), - HELP("help", 'H', "Print help information"),; + HELP("help", 'H', "Print help information"); private final String longOpt; @@ -83,14 +91,26 @@ public int getNumArgs() { public static class LlapStatusOptions { private final String name; + private final Properties conf; + private final long findAppTimeoutMs; - LlapStatusOptions(String name) { + LlapStatusOptions(String name, Properties hiveProperties, long findAppTimeoutMs) { this.name = name; + this.conf = hiveProperties; + this.findAppTimeoutMs = findAppTimeoutMs; } public String getName() { return name; } + + public Properties getConf() { + return conf; + } + + public long getFindAppTimeoutMs() { + return findAppTimeoutMs; + } } private final Options options = new Options(); @@ -113,14 +133,28 @@ public LlapStatusOptionsProcessor() { public LlapStatusOptions processOptions(String[] args) throws ParseException { commandLine = new GnuParser().parse(options, args); - if (commandLine.hasOption(OptionConstants.HELP.getShortOpt()) || - false == commandLine.hasOption(OptionConstants.NAME.getLongOpt())) { + if (commandLine.hasOption(OptionConstants.HELP.getShortOpt())) { printUsage(); return null; } String name = commandLine.getOptionValue(OptionConstants.NAME.getLongOpt()); - return new LlapStatusOptions(name); + + long findAppTimeoutMs = FIND_YARN_APP_TIMEOUT_MS; + if (commandLine.hasOption(OptionConstants.FIND_APP_TIMEOUT.getLongOpt())) { + findAppTimeoutMs = TimeUnit.MILLISECONDS.convert(Long.parseLong( + commandLine.getOptionValue(OptionConstants.FIND_APP_TIMEOUT.getLongOpt())), + TimeUnit.SECONDS); + } + + Properties hiveConf; + if (commandLine.hasOption(OptionConstants.HIVECONF.getLongOpt())) { + hiveConf = commandLine.getOptionProperties(OptionConstants.HIVECONF.getLongOpt()); + } else { + hiveConf = new Properties(); + } + + return new LlapStatusOptions(name, hiveConf, findAppTimeoutMs); } diff --git llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java index 45ba5d0..646c286 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java @@ -27,6 +27,8 @@ import java.util.List; import java.util.Map; +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.llap.cli.LlapStatusOptionsProcessor.LlapStatusOptions; @@ -53,14 +55,15 @@ private static final Logger LOG = LoggerFactory.getLogger(LlapStatusServiceDriver.class); - private static final long FIND_YARN_APP_TIMEOUT = 20 * 1000l; // 20seconds to wait for app to be visible + private static final String AM_KEY = "slider-appmaster"; private static final String LLAP_KEY = "LLAP"; private final Configuration conf; private final Clock clock = new SystemClock(); - private final AppStatusBuilder appStatusBuilder = new AppStatusBuilder(); + @VisibleForTesting + final AppStatusBuilder appStatusBuilder = new AppStatusBuilder(); public LlapStatusServiceDriver() { SessionState ss = SessionState.get(); @@ -85,7 +88,29 @@ public int run(String[] args) { conf.addResource(f); } conf.reloadConfiguration(); + for (Map.Entry props : options.getConf().entrySet()) { + conf.set((String) props.getKey(), (String) props.getValue()); + } + String appName; + appName = options.getName(); + if (StringUtils.isEmpty(appName)) { + appName = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_DAEMON_SERVICE_HOSTS); + if (appName.startsWith("@") && appName.length() > 1) { + // This is a valid slider app name. Parse it out. + appName = appName.substring(1); + } else { + // Invalid app name. Checked later. + appName = null; + } + } + if (StringUtils.isEmpty(appName)) { + String message = + "Invalid app name. This must be setup via config or passed in as a parameter." + + " This tool works with clusters deployed by Slider/YARN"; + LOG.info(message); + return ExitCode.INCORRECT_USAGE.getInt(); + } try { sliderClient = createSliderClient(); @@ -97,7 +122,7 @@ public int run(String[] args) { // Get the App report from YARN ApplicationReport appReport = null; try { - appReport = getAppReport(options, sliderClient, FIND_YARN_APP_TIMEOUT); + appReport = getAppReport(appName, sliderClient, options.getFindAppTimeoutMs()); } catch (LlapStatusCliException e) { logError(e); return e.getExitCode().getInt(); @@ -120,7 +145,7 @@ public int run(String[] args) { } else { // Get information from slider. try { - ret = populateAppStatusFromSlider(options, sliderClient, appStatusBuilder); + ret = populateAppStatusFromSlider(appName, sliderClient, appStatusBuilder); } catch (LlapStatusCliException e) { // In case of failure, send back whatever is constructed sop far - which wouldbe from the AppReport logError(e); @@ -140,8 +165,8 @@ public int run(String[] args) { } return ret.getInt(); }finally { - if (LOG.isTraceEnabled()) { - LOG.trace("Final AppState: " + appStatusBuilder.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("Final AppState: " + appStatusBuilder.toString()); } if (sliderClient != null) { sliderClient.stop(); @@ -157,6 +182,7 @@ public void outputJson(PrintWriter writer) throws LlapStatusCliException { try { writer.println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(appStatusBuilder)); } catch (IOException e) { + LOG.warn("Failed to create JSON", e); throw new LlapStatusCliException(ExitCode.LLAP_JSON_GENERATION_ERROR, "Failed to create JSON", e); } @@ -185,18 +211,26 @@ public void serviceInit(Configuration conf) throws Exception { } - private ApplicationReport getAppReport(LlapStatusOptions options, SliderClient sliderClient, + private ApplicationReport getAppReport(String appName, SliderClient sliderClient, long timeoutMs) throws LlapStatusCliException { long startTime = clock.getTime(); - long timeoutTime = startTime + timeoutMs; + long timeoutTime = timeoutMs < 0 ? Long.MAX_VALUE : (startTime + timeoutMs); ApplicationReport appReport = null; // TODO HIVE-13454 Maybe add an option to wait for a certain amount of time for the app to // move to running state. Potentially even wait for the containers to be launched. - while (clock.getTime() < timeoutTime && appReport == null) { + +// while (clock.getTime() < timeoutTime && appReport == null) { + + while (appReport == null) { try { - appReport = sliderClient.getYarnAppListClient().findInstance(options.getName()); + appReport = sliderClient.getYarnAppListClient().findInstance(appName); + if (timeoutMs == 0) { + // break immediately if timeout is 0 + break; + } + // Otherwise sleep, and try again. if (appReport == null) { long remainingTime = Math.min(timeoutTime - clock.getTime(), 500l); if (remainingTime > 0) { @@ -263,18 +297,18 @@ private ExitCode processAppReport(ApplicationReport appReport, /** * - * @param options + * @param appName * @param sliderClient * @param appStatusBuilder * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible * @throws LlapStatusCliException */ - private ExitCode populateAppStatusFromSlider(LlapStatusOptions options, SliderClient sliderClient, AppStatusBuilder appStatusBuilder) throws + private ExitCode populateAppStatusFromSlider(String appName, SliderClient sliderClient, AppStatusBuilder appStatusBuilder) throws LlapStatusCliException { ClusterDescription clusterDescription; try { - clusterDescription = sliderClient.getClusterDescription(options.getName()); + clusterDescription = sliderClient.getClusterDescription(appName); } catch (SliderException e) { throw new LlapStatusCliException(ExitCode.SLIDER_CLIENT_ERROR_OTHER, "Failed to get cluster description from slider. SliderErrorCode=" + (e).getExitCode(), e); @@ -801,17 +835,27 @@ private static void logError(Throwable t) { public static void main(String[] args) { + LOG.info("LLAP status invoked with arguments = {}", args); int ret; try { LlapStatusServiceDriver statusServiceDriver = new LlapStatusServiceDriver(); ret = statusServiceDriver.run(args); if (ret == ExitCode.SUCCESS.getInt()) { - statusServiceDriver.outputJson(new PrintWriter(System.out)); + try (PrintWriter pw = new PrintWriter(System.out)) { + statusServiceDriver.outputJson(pw); + } } } catch (Throwable t) { logError(t); - ret = ExitCode.INTERNAL_ERROR.getInt(); + if (t instanceof LlapStatusCliException) { + LlapStatusCliException ce = (LlapStatusCliException) t; + ret = ce.getExitCode().getInt(); + } else { + ret = ExitCode.INTERNAL_ERROR.getInt(); + } + } finally { + LOG.info("LLAP status finished"); } if (LOG.isDebugEnabled()) { LOG.debug("Completed processing - exiting with " + ret); diff --git llap-server/src/main/resources/llap-cli-log4j2.properties llap-server/src/main/resources/llap-cli-log4j2.properties index a141042..2f27b5e 100644 --- llap-server/src/main/resources/llap-cli-log4j2.properties +++ llap-server/src/main/resources/llap-cli-log4j2.properties @@ -22,7 +22,7 @@ packages = org.apache.hadoop.hive.ql.log property.hive.log.level = INFO property.hive.root.logger = console property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name} -property.hive.log.file = hive.log +property.hive.log.file = llap-cli.log # list of all appenders appenders = console, DRFA diff --git llap-server/src/main/resources/package.py llap-server/src/main/resources/package.py index 58c43be..63c0ef1 100644 --- llap-server/src/main/resources/package.py +++ llap-server/src/main/resources/package.py @@ -43,6 +43,11 @@ def __init__(self, config): self.queueString = "--queue " self.queueString += config["hive.llap.daemon.queue.name"] + if (not config.get("private.hive.llap.servicedriver.cluster.name")): + self.clusterName="llap0" + else: + self.clusterName = config["private.hive.llap.servicedriver.cluster.name"] + def __repr__(self): return "" % (self.heap_size, self.container_size) @@ -108,7 +113,7 @@ def main(args): "container.cores" : resource.container_cores, "hadoop_home" : os.getenv("HADOOP_HOME"), "java_home" : java_home, - "name" : args.name, + "name" : resource.clusterName, "daemon_args" : args.args, "daemon_loglevel" : args.loglevel, "queue.string" : resource.queueString, diff --git packaging/src/main/assembly/bin.xml packaging/src/main/assembly/bin.xml index 97bef59..8fd934a 100644 --- packaging/src/main/assembly/bin.xml +++ packaging/src/main/assembly/bin.xml @@ -393,6 +393,11 @@ llap-daemon-log4j2.properties.template + ${project.parent.basedir}/llap-server/src/main/resources/llap-cli-log4j2.properties + conf + llap-cli-log4j2.properties.template + + ${project.parent.basedir}/hcatalog/README.txt hcatalog/share/doc/hcatalog