diff --git a/bin/ext/llapstatus.sh b/bin/ext/llapstatus.sh index 2d2c8f4..23e6be6 100644 --- a/bin/ext/llapstatus.sh +++ b/bin/ext/llapstatus.sh @@ -17,7 +17,7 @@ THISSERVICE=llapstatus export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} " llapstatus () { - CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver; + CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver; if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then echo "Missing Hive CLI Jar" exit 3; @@ -36,7 +36,7 @@ llapstatus () { } llapstatus_help () { - CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver; + CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver; execHiveCmd $CLASS "--help" } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java index af47b26..5ec9e1d 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java @@ -24,69 +24,24 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.client.ServiceClient; import org.apache.hadoop.yarn.service.utils.CoreFileSystem; -import org.apache.hadoop.yarn.util.Clock; -import org.apache.hadoop.yarn.util.SystemClock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class LlapSliderUtils { - private static final Logger LOG = LoggerFactory - .getLogger(LlapSliderUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(LlapSliderUtils.class); private static final String LLAP_PACKAGE_DIR = ".yarn/package/LLAP/"; - public static ServiceClient createServiceClient( - Configuration conf) throws Exception { + public static ServiceClient createServiceClient(Configuration conf) throws Exception { ServiceClient serviceClient = new ServiceClient(); serviceClient.init(conf); serviceClient.start(); return serviceClient; } - public static ApplicationReport getAppReport(String appName, ServiceClient serviceClient, - long timeoutMs) throws - LlapStatusServiceDriver.LlapStatusCliException { - Clock clock = SystemClock.getInstance(); - long startTime = clock.getTime(); - long timeoutTime = timeoutMs < 0 ? Long.MAX_VALUE : (startTime + timeoutMs); - ApplicationReport appReport = null; - ApplicationId appId; - try { - appId = serviceClient.getAppId(appName); - } catch (YarnException | IOException e) { - return null; - } - - while (appReport == null) { - try { - appReport = serviceClient.getYarnClient().getApplicationReport(appId); - if (timeoutMs == 0) { - // break immediately if timeout is 0 - break; - } - // Otherwise sleep, and try again. - if (appReport == null) { - long remainingTime = Math.min(timeoutTime - clock.getTime(), 500l); - if (remainingTime > 0) { - Thread.sleep(remainingTime); - } else { - break; - } - } - } catch (Exception e) { // No point separating IOException vs YarnException vs others - throw new LlapStatusServiceDriver.LlapStatusCliException( - LlapStatusServiceDriver.ExitCode.YARN_ERROR, - "Failed to get Yarn AppReport", e); - } - } - return appReport; - } - public static Service getService(Configuration conf, String name) { LOG.info("Get service details for " + name); ServiceClient sc; @@ -112,10 +67,8 @@ public static Service getService(Configuration conf, String name) { return service; } - public static void startCluster(Configuration conf, String name, - String packageName, Path packageDir, String queue) { - LOG.info("Starting cluster with " + name + ", " - + packageName + ", " + queue + ", " + packageDir); + public static void startCluster(Configuration conf, String name, String packageName, Path packageDir, String queue) { + LOG.info("Starting cluster with " + name + ", " + packageName + ", " + queue + ", " + packageDir); ServiceClient sc; try { sc = createServiceClient(conf); diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java deleted file mode 100644 index dca0c7b..0000000 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusOptionsProcessor.java +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.llap.cli; - -import java.util.Properties; -import java.util.concurrent.TimeUnit; - -import jline.TerminalFactory; -import org.apache.commons.cli.GnuParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; - -import com.google.common.annotations.VisibleForTesting; - -public class LlapStatusOptionsProcessor { - - private static final String LLAPSTATUS_CONSTANT = "llapstatus"; - - @VisibleForTesting - public static final long FIND_YARN_APP_TIMEOUT_MS = 20 * 1000l; // 20seconds to wait for app to be visible - @VisibleForTesting - public static final long DEFAULT_STATUS_REFRESH_INTERVAL_MS = 1 * 1000l; // 1 seconds wait until subsequent status - @VisibleForTesting - public static final long DEFAULT_WATCH_MODE_TIMEOUT_MS = 5 * 60 * 1000l; // 5 minutes timeout for watch mode - @VisibleForTesting - public static final float DEFAULT_RUNNING_NODES_THRESHOLD = 1.0f; - - // TODO: why doesn't this use one of the existing options implementations?! - enum OptionConstants { - - NAME("name", 'n', "LLAP cluster name", true), - FIND_APP_TIMEOUT("findAppTimeout", 'f', - "Amount of time(s) that the tool will sleep to wait for the YARN application to start. negative values=wait " + - "forever, 0=Do not wait. default=" + TimeUnit.SECONDS.convert(FIND_YARN_APP_TIMEOUT_MS, TimeUnit.MILLISECONDS) + - "s", true), - OUTPUT_FILE("outputFile", 'o', "File to which output should be written (Default stdout)", true), - WATCH_MODE("watch", 'w', "Watch mode waits until all LLAP daemons are running or subset of the nodes are " + - "running (threshold can be specified via -r option) (Default wait until all nodes are running)", false), - // This is a negative because we want the positive to be the default when nothing is specified. - NOT_LAUNCHED("notlaunched", 'l', "In watch mode, do not assume that the application was " - + "already launched if there's doubt (e.g. if the last application instance has failed).", - false), - RUNNING_NODES_THRESHOLD("runningNodesThreshold", 'r', "When watch mode is enabled (-w), wait until the " + - "specified threshold of nodes are running (Default 1.0 which means 100% nodes are running)", true), - STATUS_REFRESH_INTERVAL("refreshInterval", 'i', "Amount of time in seconds to wait until subsequent status checks" + - " in watch mode. Valid only for watch mode. (Default " + - TimeUnit.SECONDS.convert(DEFAULT_STATUS_REFRESH_INTERVAL_MS, TimeUnit.MILLISECONDS) + "s)", true), - WATCH_MODE_TIMEOUT("watchTimeout", 't', "Exit watch mode if the desired state is not attained until the specified" + - " timeout. (Default " + TimeUnit.SECONDS.convert(DEFAULT_WATCH_MODE_TIMEOUT_MS, TimeUnit.MILLISECONDS) +"s)", - true), - HIVECONF("hiveconf", null, "Use value for given property. Overridden by explicit parameters", "property=value", 2), - HELP("help", 'H', "Print help information", false); - - - private final String longOpt; - private final Character shortOpt; - private final String description; - private final String argName; - private final int numArgs; - - OptionConstants(String longOpt, char shortOpt, String description, boolean hasArgs) { - this(longOpt, shortOpt, description, longOpt, hasArgs ? 1 : 0); - } - - OptionConstants(String longOpt, Character shortOpt, String description, String argName, int numArgs) { - this.longOpt = longOpt; - this.shortOpt = shortOpt; - this.description = description; - this.argName = argName; - this.numArgs = numArgs; - } - - public String getLongOpt() { - return longOpt; - } - - public Character getShortOpt() { - return shortOpt; - } - - public String getDescription() { - return description; - } - - public String getArgName() { - return argName; - } - - public int getNumArgs() { - return numArgs; - } - } - - - public static class LlapStatusOptions { - private final String name; - private final Properties conf; - private final long findAppTimeoutMs; - private final String outputFile; - private final long refreshIntervalMs; - private final boolean watchMode; - private final long watchTimeout; - private final float runningNodesThreshold; - private final boolean isLaunched; - - public LlapStatusOptions(final String name) { - this(name, new Properties(), FIND_YARN_APP_TIMEOUT_MS, null, DEFAULT_STATUS_REFRESH_INTERVAL_MS, false, - DEFAULT_WATCH_MODE_TIMEOUT_MS, DEFAULT_RUNNING_NODES_THRESHOLD, true); - } - - public LlapStatusOptions(String name, Properties hiveProperties, long findAppTimeoutMs, - String outputFile, long refreshIntervalMs, - final boolean watchMode, final long watchTimeoutMs, - final float runningNodesThreshold, final boolean isLaunched) { - this.name = name; - this.conf = hiveProperties; - this.findAppTimeoutMs = findAppTimeoutMs; - this.outputFile = outputFile; - this.refreshIntervalMs = refreshIntervalMs; - this.watchMode = watchMode; - this.watchTimeout = watchTimeoutMs; - this.runningNodesThreshold = runningNodesThreshold; - this.isLaunched = isLaunched; - } - - public String getName() { - return name; - } - - public Properties getConf() { - return conf; - } - - public long getFindAppTimeoutMs() { - return findAppTimeoutMs; - } - - public String getOutputFile() { - return outputFile; - } - - public long getRefreshIntervalMs() { - return refreshIntervalMs; - } - - public boolean isWatchMode() { - return watchMode; - } - - public boolean isLaunched() { - return isLaunched; - } - - public long getWatchTimeoutMs() { - return watchTimeout; - } - - public float getRunningNodesThreshold() { - return runningNodesThreshold; - } - } - - private final Options options = new Options(); - private org.apache.commons.cli.CommandLine commandLine; - - public LlapStatusOptionsProcessor() { - - for (OptionConstants optionConstant : OptionConstants.values()) { - - OptionBuilder optionBuilder = OptionBuilder.hasArgs(optionConstant.getNumArgs()) - .withArgName(optionConstant.getArgName()).withLongOpt(optionConstant.getLongOpt()) - .withDescription(optionConstant.getDescription()); - if (optionConstant.getShortOpt() == null) { - options.addOption(optionBuilder.create()); - } else { - options.addOption(optionBuilder.create(optionConstant.getShortOpt())); - } - } - } - - public LlapStatusOptions processOptions(String[] args) throws ParseException { - commandLine = new GnuParser().parse(options, args); - if (commandLine.hasOption(OptionConstants.HELP.getShortOpt())) { - printUsage(); - return null; - } - - String name = commandLine.getOptionValue(OptionConstants.NAME.getLongOpt()); - - long findAppTimeoutMs = FIND_YARN_APP_TIMEOUT_MS; - if (commandLine.hasOption(OptionConstants.FIND_APP_TIMEOUT.getLongOpt())) { - findAppTimeoutMs = TimeUnit.MILLISECONDS.convert(Long.parseLong( - commandLine.getOptionValue(OptionConstants.FIND_APP_TIMEOUT.getLongOpt())), - TimeUnit.SECONDS); - } - - Properties hiveConf; - if (commandLine.hasOption(OptionConstants.HIVECONF.getLongOpt())) { - hiveConf = commandLine.getOptionProperties(OptionConstants.HIVECONF.getLongOpt()); - } else { - hiveConf = new Properties(); - } - - String outputFile = null; - if (commandLine.hasOption(OptionConstants.OUTPUT_FILE.getLongOpt())) { - outputFile = commandLine.getOptionValue(OptionConstants.OUTPUT_FILE.getLongOpt()); - } - - long refreshIntervalMs = DEFAULT_STATUS_REFRESH_INTERVAL_MS; - if (commandLine.hasOption(OptionConstants.STATUS_REFRESH_INTERVAL.getLongOpt())) { - long refreshIntervalSec = Long.parseLong(commandLine.getOptionValue(OptionConstants.STATUS_REFRESH_INTERVAL - .getLongOpt())); - if (refreshIntervalSec <= 0) { - throw new IllegalArgumentException("Refresh interval should be >0"); - } - refreshIntervalMs = TimeUnit.MILLISECONDS.convert(refreshIntervalSec, TimeUnit.SECONDS); - } - - boolean watchMode = commandLine.hasOption(OptionConstants.WATCH_MODE.getLongOpt()); - long watchTimeoutMs = DEFAULT_WATCH_MODE_TIMEOUT_MS; - if (commandLine.hasOption(OptionConstants.WATCH_MODE_TIMEOUT.getLongOpt())) { - long watchTimeoutSec = Long.parseLong(commandLine.getOptionValue( - OptionConstants.WATCH_MODE_TIMEOUT.getLongOpt())); - if (watchTimeoutSec <= 0) { - throw new IllegalArgumentException("Watch timeout should be >0"); - } - watchTimeoutMs = TimeUnit.MILLISECONDS.convert(watchTimeoutSec, TimeUnit.SECONDS); - } - - boolean isLaunched = !commandLine.hasOption(OptionConstants.NOT_LAUNCHED.getLongOpt()); - - float runningNodesThreshold = DEFAULT_RUNNING_NODES_THRESHOLD; - if (commandLine.hasOption(OptionConstants.RUNNING_NODES_THRESHOLD.getLongOpt())) { - runningNodesThreshold = Float.parseFloat(commandLine.getOptionValue( - OptionConstants.RUNNING_NODES_THRESHOLD.getLongOpt())); - if (runningNodesThreshold < 0.0f || runningNodesThreshold > 1.0f) { - throw new IllegalArgumentException( - "Running nodes threshold value should be between 0.0 and 1.0 (inclusive)"); - } - } - return new LlapStatusOptions(name, hiveConf, findAppTimeoutMs, outputFile, refreshIntervalMs, - watchMode, watchTimeoutMs, runningNodesThreshold, isLaunched); - } - - - public static void printUsage() { - HelpFormatter hf = new HelpFormatter(); - try { - int width = hf.getWidth(); - int jlineWidth = TerminalFactory.get().getWidth(); - width = Math.min(160, Math.max(jlineWidth, width)); // Ignore potentially incorrect values - hf.setWidth(width); - } catch (Throwable t) { // Ignore - } - - LlapStatusOptionsProcessor optionsProcessor = new LlapStatusOptionsProcessor(); - hf.printHelp(LLAPSTATUS_CONSTANT, optionsProcessor.options); - } - -} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java deleted file mode 100644 index a521799..0000000 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java +++ /dev/null @@ -1,811 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.llap.cli; - - -import com.google.common.annotations.VisibleForTesting; -import java.io.BufferedOutputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.io.PrintWriter; -import java.text.DecimalFormat; -import java.util.Arrays; -import java.util.Collection; -import java.util.EnumSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeysPublic; -import org.apache.hadoop.hive.common.classification.InterfaceAudience; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.llap.cli.LlapStatusOptionsProcessor.LlapStatusOptions; -import org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers; -import org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.AppStatusBuilder; -import org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.LlapInstance; -import org.apache.hadoop.hive.llap.cli.status.LlapStatusHelpers.State; -import org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration; -import org.apache.hadoop.hive.llap.registry.LlapServiceInstance; -import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.yarn.api.records.ApplicationReport; -import org.apache.hadoop.yarn.api.records.ContainerExitStatus; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.service.api.records.Container; -import org.apache.hadoop.yarn.service.api.records.Service; -import org.apache.hadoop.yarn.service.api.records.ServiceState; -import org.apache.hadoop.yarn.service.client.ServiceClient; -import org.apache.hadoop.yarn.util.Clock; -import org.apache.hadoop.yarn.util.SystemClock; -import org.codehaus.jackson.map.ObjectMapper; -import org.codehaus.jackson.map.SerializationConfig; -import org.codehaus.jackson.map.annotate.JsonSerialize; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class LlapStatusServiceDriver { - private static final Logger LOG = LoggerFactory.getLogger(LlapStatusServiceDriver.class); - private static final Logger CONSOLE_LOGGER = LoggerFactory.getLogger("LlapStatusServiceDriverConsole"); - - private static final EnumSet NO_YARN_SERVICE_INFO_STATES = EnumSet.of( - State.APP_NOT_FOUND, State.COMPLETE, State.LAUNCHING); - private static final EnumSet LAUNCHING_STATES = EnumSet.of( - State.LAUNCHING, State.RUNNING_PARTIAL, State.RUNNING_ALL); - - // Defining a bunch of configs here instead of in HiveConf. These are experimental, and mainly - // for use when retry handling is fixed in Yarn/Hadoop - - private static final String CONF_PREFIX = "hive.llapcli."; - - // The following two keys should ideally be used to control RM connect timeouts. However, - // they don't seem to work. The IPC timeout needs to be set instead. - @InterfaceAudience.Private - private static final String CONFIG_YARN_RM_TIMEOUT_MAX_WAIT_MS = CONF_PREFIX + "yarn.rm.connect.max-wait-ms"; - private static final long CONFIG_YARN_RM_TIMEOUT_MAX_WAIT_MS_DEFAULT = 10000l; - @InterfaceAudience.Private - private static final String CONFIG_YARN_RM_RETRY_INTERVAL_MS = CONF_PREFIX + "yarn.rm.connect.retry-interval.ms"; - private static final long CONFIG_YARN_RM_RETRY_INTERVAL_MS_DEFAULT = 5000l; - - // As of Hadoop 2.7 - this is what controls the RM timeout. - @InterfaceAudience.Private - private static final String CONFIG_IPC_CLIENT_CONNECT_MAX_RETRIES = CONF_PREFIX + "ipc.client.max-retries"; - private static final int CONFIG_IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT = 2; - @InterfaceAudience.Private - private static final String CONFIG_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS = - CONF_PREFIX + "ipc.client.connect.retry-interval-ms"; - private static final long CONFIG_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS_DEFAULT = 1500l; - - // As of Hadoop 2.8 - this timeout spec behaves in a strnage manner. "2000,1" means 2000s with 1 retry. - // However it does this - but does it thrice. Essentially - #retries+2 is the number of times the entire config - // is retried. "2000,1" means 3 retries - each with 1 retry with a random 2000ms sleep. - @InterfaceAudience.Private - private static final String CONFIG_TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC = - CONF_PREFIX + "timeline.service.fs-store.retry.policy.spec"; - private static final String - CONFIG_TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC_DEFAULT = "2000, 1"; - - private static final String CONFIG_LLAP_ZK_REGISTRY_TIMEOUT_MS = CONF_PREFIX + "zk-registry.timeout-ms"; - private static final long CONFIG_LLAP_ZK_REGISTRY_TIMEOUT_MS_DEFAULT = 20000l; - - private static final long LOG_SUMMARY_INTERVAL = 15000L; // Log summary every ~15 seconds. - private static final String LLAP_KEY = "llap"; - - private final Configuration conf; - private String appName = null; - private String applicationId = null; - private ServiceClient serviceClient = null; - private Configuration llapRegistryConf = null; - private LlapRegistryService llapRegistry = null; - - private AppStatusBuilder appStatusBuilder; - - public LlapStatusServiceDriver() { - SessionState ss = SessionState.get(); - conf = (ss != null) ? ss.getConf() : new HiveConf(SessionState.class); - setupConf(); - } - - private void setupConf() { - for (String f : LlapDaemonConfiguration.DAEMON_CONFIGS) { - conf.addResource(f); - } - conf.reloadConfiguration(); - - // Setup timeouts for various services. - - // Once we move to a Hadoop-2.8 dependency, the following paramteer can be used. - // conf.set(YarnConfiguration.TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC); - conf.set("yarn.timeline-service.entity-group-fs-store.retry-policy-spec", - conf.get(CONFIG_TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC, - CONFIG_TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC_DEFAULT)); - - conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, - conf.getLong(CONFIG_YARN_RM_TIMEOUT_MAX_WAIT_MS, CONFIG_YARN_RM_TIMEOUT_MAX_WAIT_MS_DEFAULT)); - conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, - conf.getLong(CONFIG_YARN_RM_RETRY_INTERVAL_MS, CONFIG_YARN_RM_RETRY_INTERVAL_MS_DEFAULT)); - - conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, - conf.getInt(CONFIG_IPC_CLIENT_CONNECT_MAX_RETRIES, CONFIG_IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT)); - conf.setLong(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_RETRY_INTERVAL_KEY, - conf.getLong(CONFIG_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS, CONFIG_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS_DEFAULT)); - - HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT, ( - conf.getLong(CONFIG_LLAP_ZK_REGISTRY_TIMEOUT_MS, CONFIG_LLAP_ZK_REGISTRY_TIMEOUT_MS_DEFAULT) + "ms")); - - llapRegistryConf = new Configuration(conf); - } - - /** - * Parse command line options. - * - * @return command line options. - */ - @VisibleForTesting - public LlapStatusOptions parseOptions(String[] args) throws LlapStatusCliException { - - LlapStatusOptionsProcessor optionsProcessor = new LlapStatusOptionsProcessor(); - LlapStatusOptions options; - try { - options = optionsProcessor.processOptions(args); - return options; - } catch (Exception e) { - LOG.info("Failed to parse arguments", e); - throw new LlapStatusCliException(ExitCode.INCORRECT_USAGE, "Incorrect usage"); - } - } - - public int run(LlapStatusOptions options, long watchTimeoutMs) { - appStatusBuilder = new AppStatusBuilder(); - try { - if (appName == null) { - // user provided configs - for (Map.Entry props : options.getConf().entrySet()) { - conf.set((String) props.getKey(), (String) props.getValue()); - } - - appName = options.getName(); - if (StringUtils.isEmpty(appName)) { - appName = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_DAEMON_SERVICE_HOSTS); - if (appName.startsWith("@") && appName.length() > 1) { - // This is a valid YARN Service name. Parse it out. - appName = appName.substring(1); - } else { - // Invalid app name. Checked later. - appName = null; - } - } - if (StringUtils.isEmpty(appName)) { - String message = - "Invalid app name. This must be setup via config or passed in as a parameter." + - " This tool works with clusters deployed by YARN Service"; - LOG.info(message); - return ExitCode.INCORRECT_USAGE.getInt(); - } - LOG.debug("Using appName: {}", appName); - - llapRegistryConf.set(HiveConf.ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + appName); - } - - try { - if (serviceClient == null) { - serviceClient = LlapSliderUtils.createServiceClient(conf); - } - } catch (Exception e) { - LlapStatusCliException le = new LlapStatusCliException( - LlapStatusServiceDriver.ExitCode.SERVICE_CLIENT_ERROR_CREATE_FAILED, "Failed to create service client", e); - logError(le); - return le.getExitCode().getInt(); - } - - // Get the App report from YARN - ApplicationReport appReport; - try { - appReport = LlapSliderUtils.getAppReport(appName, serviceClient, options.getFindAppTimeoutMs()); - } catch (LlapStatusCliException e) { - logError(e); - return e.getExitCode().getInt(); - } - - // Process the report - ExitCode ret; - try { - ret = processAppReport(appReport, appStatusBuilder); - } catch (LlapStatusCliException e) { - logError(e); - return e.getExitCode().getInt(); - } - - if (ret != ExitCode.SUCCESS) { - return ret.getInt(); - } else if (NO_YARN_SERVICE_INFO_STATES.contains(appStatusBuilder.getState())) { - return ExitCode.SUCCESS.getInt(); - } else { - // Get information from YARN Service - try { - ret = populateAppStatusFromServiceStatus(appName, serviceClient, appStatusBuilder); - } catch (LlapStatusCliException e) { - // In case of failure, send back whatever is constructed so far - which would be from the AppReport - logError(e); - return e.getExitCode().getInt(); - } - } - - if (ret != ExitCode.SUCCESS) { - return ret.getInt(); - } else { - try { - ret = populateAppStatusFromLlapRegistry(appStatusBuilder, watchTimeoutMs); - } catch (LlapStatusCliException e) { - logError(e); - return e.getExitCode().getInt(); - } - } - - return ret.getInt(); - } finally { - LOG.debug("Final AppState: " + appStatusBuilder.toString()); - } - } - - public void outputJson(PrintWriter writer) throws LlapStatusCliException { - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false); - mapper.setSerializationInclusion(JsonSerialize.Inclusion.NON_NULL); - mapper.setSerializationInclusion(JsonSerialize.Inclusion.NON_EMPTY); - try { - writer.println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(appStatusBuilder)); - } catch (IOException e) { - LOG.warn("Failed to create JSON", e); - throw new LlapStatusCliException(ExitCode.LLAP_JSON_GENERATION_ERROR, "Failed to create JSON", e); - } - } - - /** - * Populates parts of the AppStatus - * - * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible - * @throws LlapStatusCliException - */ - private ExitCode processAppReport(ApplicationReport appReport, AppStatusBuilder appStatusBuilder) - throws LlapStatusCliException { - if (appReport == null) { - appStatusBuilder.setState(State.APP_NOT_FOUND); - LOG.info("No Application Found"); - return ExitCode.SUCCESS; - } - - applicationId = appReport.getApplicationId().toString(); - - // TODO Maybe add the YARN URL for the app. - appStatusBuilder.setAmInfo( - new LlapStatusHelpers.AmInfo().setAppName(appReport.getName()).setAppType(appReport.getApplicationType())); - appStatusBuilder.setAppStartTime(appReport.getStartTime()); - switch (appReport.getYarnApplicationState()) { - case NEW: - case NEW_SAVING: - case SUBMITTED: - appStatusBuilder.setState(State.LAUNCHING); - return ExitCode.SUCCESS; - case ACCEPTED: - appStatusBuilder.maybeCreateAndGetAmInfo().setAppId(applicationId); - appStatusBuilder.setState(State.LAUNCHING); - return ExitCode.SUCCESS; - case RUNNING: - appStatusBuilder.maybeCreateAndGetAmInfo().setAppId(applicationId); - // If the app state is running, get additional information from YARN Service - return ExitCode.SUCCESS; - case FINISHED: - case FAILED: - case KILLED: - appStatusBuilder.maybeCreateAndGetAmInfo().setAppId(applicationId); - appStatusBuilder.setAppFinishTime(appReport.getFinishTime()); - appStatusBuilder.setState(State.COMPLETE); - // add log links and other diagnostics from YARN Service - return ExitCode.SUCCESS; - default: - throw new LlapStatusCliException(ExitCode.INTERNAL_ERROR, - "Unknown Yarn Application State: " + appReport.getYarnApplicationState()); - } - } - - /** - * Populates information from YARN Service Status. - * - * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible - * @throws LlapStatusCliException - */ - private ExitCode populateAppStatusFromServiceStatus(String appName, ServiceClient serviceClient, - AppStatusBuilder appStatusBuilder) throws LlapStatusCliException { - ExitCode exitCode = ExitCode.YARN_ERROR; - try { - Service service = serviceClient.getStatus(appName); - if (service != null) { - // How to get config paths and AmInfo - ServiceState state = service.getState(); - appStatusBuilder.setAppStartTime(service.getLaunchTime() == null ? 0 : service.getLaunchTime().getTime()); - appStatusBuilder.setDesiredInstances(service.getComponent(LLAP_KEY).getNumberOfContainers() == null ? 0 - : service.getComponent(LLAP_KEY).getNumberOfContainers().intValue()); - appStatusBuilder.setLiveInstances(service.getComponent(LLAP_KEY).getContainers().size()); - for (Container cont : service.getComponent(LLAP_KEY).getContainers()) { - LlapInstance llapInstance = new LlapInstance(cont.getHostname(), cont.getId()); - appStatusBuilder.addNewRunningLlapInstance(llapInstance); - } - if (state == ServiceState.STABLE) { - exitCode = ExitCode.SUCCESS; - } - } else { - exitCode = ExitCode.SERVICE_CLIENT_ERROR_OTHER; - } - } catch (IOException | YarnException e) { - LlapStatusCliException le = new LlapStatusCliException( - LlapStatusServiceDriver.ExitCode.SERVICE_CLIENT_ERROR_OTHER, "Failed to get service status", e); - logError(le); - exitCode = le.getExitCode(); - } - return exitCode; - } - - /** - * Populate additional information for containers from the LLAP registry. Must be invoked - * after YARN Service status and diagnostics. - * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible - * @throws LlapStatusCliException - */ - private ExitCode populateAppStatusFromLlapRegistry(AppStatusBuilder appStatusBuilder, long watchTimeoutMs) - throws LlapStatusCliException { - - if (llapRegistry == null) { - try { - llapRegistry = LlapRegistryService.getClient(llapRegistryConf); - } catch (Exception e) { - throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, - "Failed to create llap registry client", e); - } - } - - Collection serviceInstances; - try { - serviceInstances = llapRegistry.getInstances(watchTimeoutMs).getAll(); - } catch (Exception e) { - throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e); - } - - if (serviceInstances == null || serviceInstances.isEmpty()) { - LOG.debug("No information found in the LLAP registry"); - appStatusBuilder.setLiveInstances(0); - appStatusBuilder.setState(State.LAUNCHING); - appStatusBuilder.clearRunningLlapInstances(); - return ExitCode.SUCCESS; - } else { - // Tracks instances known by both YARN Service and llap. - List validatedInstances = new LinkedList<>(); - List llapExtraInstances = new LinkedList<>(); - - for (LlapServiceInstance serviceInstance : serviceInstances) { - String containerIdString = serviceInstance.getProperties().get( - HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname); - - LlapInstance llapInstance = appStatusBuilder.removeAndGetRunningLlapInstanceForContainer( - containerIdString); - if (llapInstance != null) { - llapInstance.setMgmtPort(serviceInstance.getManagementPort()); - llapInstance.setRpcPort(serviceInstance.getRpcPort()); - llapInstance.setShufflePort(serviceInstance.getShufflePort()); - llapInstance.setWebUrl(serviceInstance.getServicesAddress()); - llapInstance.setStatusUrl(serviceInstance.getServicesAddress() + "/status"); - validatedInstances.add(llapInstance); - } else { - // This likely indicates that an instance has recently restarted - // (the old instance has not been unregistered), and the new instances has not registered yet. - llapExtraInstances.add(containerIdString); - // This instance will not be added back, since it's services are not up yet. - } - - } - - appStatusBuilder.setLiveInstances(validatedInstances.size()); - appStatusBuilder.setLaunchingInstances(llapExtraInstances.size()); - if (appStatusBuilder.getDesiredInstances() != null && - validatedInstances.size() >= appStatusBuilder.getDesiredInstances()) { - appStatusBuilder.setState(State.RUNNING_ALL); - if (validatedInstances.size() > appStatusBuilder.getDesiredInstances()) { - LOG.warn("Found more entries in LLAP registry, as compared to desired entries"); - } - } else { - if (validatedInstances.size() > 0) { - appStatusBuilder.setState(State.RUNNING_PARTIAL); - } else { - appStatusBuilder.setState(State.LAUNCHING); - } - } - - // At this point, everything that can be consumed from AppStatusBuilder has been consumed. - // Debug only - if (appStatusBuilder.allRunningInstances().size() > 0) { - // Containers likely to come up soon. - LOG.debug("Potential instances starting up: {}", appStatusBuilder.allRunningInstances()); - } - if (llapExtraInstances.size() > 0) { - // Old containers which are likely shutting down, or new containers which - // launched between YARN Service status/diagnostics. Skip for this iteration. - LOG.debug("Instances likely to shutdown soon: {}", llapExtraInstances); - } - - appStatusBuilder.clearAndAddPreviouslyKnownRunningInstances(validatedInstances); - - } - return ExitCode.SUCCESS; - } - - private void close() { - if (serviceClient != null) { - serviceClient.stop(); - } - if (llapRegistry != null) { - llapRegistry.stop(); - } - } - - public enum ExitCode { - SUCCESS(0), - INCORRECT_USAGE(10), - YARN_ERROR(20), - SERVICE_CLIENT_ERROR_CREATE_FAILED(30), - SERVICE_CLIENT_ERROR_OTHER(31), - LLAP_REGISTRY_ERROR(40), - LLAP_JSON_GENERATION_ERROR(50), - // Error in the script itself - likely caused by an incompatible change, or new functionality / states added. - INTERNAL_ERROR(100); - - private final int exitCode; - - ExitCode(int exitCode) { - this.exitCode = exitCode; - } - - public int getInt() { - return exitCode; - } - } - - public static class LlapStatusCliException extends Exception { - final LlapStatusServiceDriver.ExitCode exitCode; - - public LlapStatusCliException(LlapStatusServiceDriver.ExitCode exitCode, String message) { - super(exitCode.getInt() +": " + message); - this.exitCode = exitCode; - } - - public LlapStatusCliException(LlapStatusServiceDriver.ExitCode exitCode, String message, Throwable cause) { - super(exitCode.getInt() +": " + message, cause); - this.exitCode = exitCode; - } - - public LlapStatusServiceDriver.ExitCode getExitCode() { - return exitCode; - } - } - - public static void main(String[] args) { - LOG.info("LLAP status invoked with arguments = {}", Arrays.toString(args)); - int ret = ExitCode.SUCCESS.getInt(); - Clock clock = SystemClock.getInstance(); - long lastSummaryLogTime = -1; - - LlapStatusServiceDriver statusServiceDriver = null; - LlapStatusOptions options = null; - try { - statusServiceDriver = new LlapStatusServiceDriver(); - options = statusServiceDriver.parseOptions(args); - } catch (Throwable t) { - statusServiceDriver.close(); - logError(t); - if (t instanceof LlapStatusCliException) { - LlapStatusCliException ce = (LlapStatusCliException) t; - ret = ce.getExitCode().getInt(); - } else { - ret = ExitCode.INTERNAL_ERROR.getInt(); - } - } - if (ret != 0 || options == null) { // Failure / help - if (statusServiceDriver != null) { - statusServiceDriver.close(); - } - System.exit(ret); - } - - boolean firstAttempt = true; - final long refreshInterval = options.getRefreshIntervalMs(); - final boolean watchMode = options.isWatchMode(); - final long watchTimeout = options.getWatchTimeoutMs(); - long numAttempts = watchTimeout / refreshInterval; - numAttempts = watchMode ? numAttempts : 1; // Break out of the loop fast if watchMode is disabled. - LlapStatusHelpers.State launchingState = null; - LlapStatusHelpers.State currentState = null; - boolean desiredStateAttained = false; - final float runningNodesThreshold = options.getRunningNodesThreshold(); - try (OutputStream os = options.getOutputFile() == null ? System.out : - new BufferedOutputStream(new FileOutputStream(options.getOutputFile())); - PrintWriter pw = new PrintWriter(os)) { - - LOG.info("Configured refresh interval: {}s. Watch timeout: {}s. Attempts remaining: {}." + - " Watch mode: {}. Running nodes threshold: {}.", - TimeUnit.SECONDS.convert(refreshInterval, TimeUnit.MILLISECONDS), - TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS), - numAttempts, watchMode, new DecimalFormat("#.###").format(runningNodesThreshold)); - while (numAttempts > 0) { - if (!firstAttempt) { - if (watchMode) { - try { - Thread.sleep(refreshInterval); - } catch (InterruptedException e) { - // ignore - } - } else { - // reported once, so break - break; - } - } else { - firstAttempt = false; - } - ret = statusServiceDriver.run(options, watchMode ? watchTimeout : 0); - currentState = statusServiceDriver.appStatusBuilder.getState(); - try { - lastSummaryLogTime = LlapStatusServiceDriver.maybeLogSummary(clock, lastSummaryLogTime, - statusServiceDriver, watchMode, watchTimeout, launchingState); - } catch (Exception e) { - LOG.warn("Failed to log summary", e); - } - - if (ret == ExitCode.SUCCESS.getInt()) { - if (watchMode) { - - // YARN Service has started llap application, now if for some reason - // state changes to COMPLETE then fail fast - if (launchingState == null && LAUNCHING_STATES.contains(currentState)) { - launchingState = currentState; - } - - if (currentState.equals(State.COMPLETE)) { - if (launchingState != null || options.isLaunched()) { - LOG.warn("COMPLETE state reached while waiting for RUNNING state. Failing."); - System.err.println("Final diagnostics: " + statusServiceDriver.appStatusBuilder.getDiagnostics()); - break; - } else { - LOG.info("Found a stopped application; assuming it was a previous attempt " - + "and waiting for the next one. Omit the -l flag to avoid this."); - } - } - - if (!(currentState.equals(State.RUNNING_PARTIAL) || currentState.equals(State.RUNNING_ALL))) { - LOG.debug( - "Current state: {}. Desired state: {}. {}/{} instances.", - currentState, - runningNodesThreshold == 1.0f ? - State.RUNNING_ALL : - State.RUNNING_PARTIAL, - statusServiceDriver.appStatusBuilder.getLiveInstances(), - statusServiceDriver.appStatusBuilder.getDesiredInstances()); - numAttempts--; - continue; - } - - // we have reached RUNNING state, now check if running nodes threshold is met - final int liveInstances = statusServiceDriver.appStatusBuilder.getLiveInstances(); - final int desiredInstances = statusServiceDriver.appStatusBuilder.getDesiredInstances(); - if (desiredInstances > 0) { - final float ratio = (float) liveInstances / (float) desiredInstances; - if (ratio < runningNodesThreshold) { - LOG.debug( - "Waiting until running nodes threshold is reached. Current: {} Desired: {}." + - " {}/{} instances.", - new DecimalFormat("#.###").format(ratio), - new DecimalFormat("#.###").format(runningNodesThreshold), - statusServiceDriver.appStatusBuilder.getLiveInstances(), - statusServiceDriver.appStatusBuilder.getDesiredInstances()); - numAttempts--; - continue; - } else { - desiredStateAttained = true; - statusServiceDriver.appStatusBuilder.setRunningThresholdAchieved(true); - } - } else { - numAttempts--; - continue; - } - } - } else if (ret == ExitCode.YARN_ERROR.getInt() && watchMode) { - LOG.warn("Watch mode enabled and got YARN error. Retrying.."); - numAttempts--; - continue; - } else if (ret == ExitCode.SERVICE_CLIENT_ERROR_CREATE_FAILED.getInt() && watchMode) { - LOG.warn("Watch mode enabled and YARN Service client creation failed. Retrying.."); - numAttempts--; - continue; - } else if (ret == ExitCode.SERVICE_CLIENT_ERROR_OTHER.getInt() && watchMode) { - LOG.warn("Watch mode enabled and got YARN Service client error. Retrying.."); - numAttempts--; - continue; - } else if (ret == ExitCode.LLAP_REGISTRY_ERROR.getInt() && watchMode) { - LOG.warn("Watch mode enabled and got LLAP registry error. Retrying.."); - numAttempts--; - continue; - } - break; - } - // Log final state to CONSOLE_LOGGER - LlapStatusServiceDriver.maybeLogSummary(clock, 0L, statusServiceDriver, watchMode, watchTimeout, launchingState); - CONSOLE_LOGGER.info("\n\n\n"); - // print current state before exiting - statusServiceDriver.outputJson(pw); - os.flush(); - pw.flush(); - if (numAttempts == 0 && watchMode && !desiredStateAttained) { - LOG.warn("Watch timeout {}s exhausted before desired state RUNNING is attained.", - TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS)); - } - } catch (Throwable t) { - logError(t); - if (t instanceof LlapStatusCliException) { - LlapStatusCliException ce = (LlapStatusCliException) t; - ret = ce.getExitCode().getInt(); - } else { - ret = ExitCode.INTERNAL_ERROR.getInt(); - } - } finally { - LOG.info("LLAP status finished"); - if (ret != ExitCode.SUCCESS.exitCode) { - LOG.error("LLAP did not start. Check the application log for more info:\n" + - "\tyarn logs --applicationId {} -out ", statusServiceDriver.applicationId); - } - statusServiceDriver.close(); - } - LOG.debug("Completed processing - exiting with " + ret); - - System.exit(ret); - } - - private static long maybeLogSummary(Clock clock, long lastSummaryLogTime, LlapStatusServiceDriver statusServiceDriver, - boolean watchMode, long watchTimeout, LlapStatusHelpers.State launchingState) { - long currentTime = clock.getTime(); - if (lastSummaryLogTime < currentTime - LOG_SUMMARY_INTERVAL) { - String diagString = null; - if (launchingState == null && statusServiceDriver.appStatusBuilder.getState() == State.COMPLETE && watchMode) { - // First known state was COMPLETED. Wait for the app launch to start. - diagString = "Awaiting LLAP launch"; - // Clear completed instances in this case. Don't want to provide information from the previous run. - statusServiceDriver.appStatusBuilder.clearCompletedLlapInstances(); - } else { - diagString = constructDiagnostics(statusServiceDriver.appStatusBuilder); - } - - if (lastSummaryLogTime == -1) { - if (watchMode) { - CONSOLE_LOGGER.info("\nLLAPSTATUS WatchMode with timeout={} s", - TimeUnit.SECONDS.convert(watchTimeout, TimeUnit.MILLISECONDS)); - } else { - CONSOLE_LOGGER.info("\nLLAPSTATUS"); - } - CONSOLE_LOGGER.info("--------------------------------------------------------------------------------"); - } - CONSOLE_LOGGER.info(diagString); - CONSOLE_LOGGER.info("--------------------------------------------------------------------------------"); - lastSummaryLogTime = currentTime; - } - return lastSummaryLogTime; - } - - /** - * Helper method to construct a diagnostic message from a complete AppStatusBuilder. - */ - private static String constructDiagnostics(AppStatusBuilder appStatusBuilder) { - StringBuilder sb = new StringBuilder(); - - switch (appStatusBuilder.getState()) { - case APP_NOT_FOUND: - sb.append("LLAP status unknown. Awaiting app launch"); - break; - case LAUNCHING: - // This is a catch all state - when containers have not started yet, or LLAP has not started yet. - if (StringUtils.isNotBlank(appStatusBuilder.getAmInfo().getAppId())) { - sb.append("LLAP Starting up with AppId=").append(appStatusBuilder.getAmInfo().getAppId()).append("."); - if (appStatusBuilder.getDesiredInstances() != null) { - sb.append(" Started 0/").append(appStatusBuilder.getDesiredInstances()).append(" instances"); - } - - String containerDiagnostics = constructCompletedContainerDiagnostics( - appStatusBuilder.getCompletedInstances()); - if (StringUtils.isNotEmpty(containerDiagnostics)) { - sb.append("\n").append(containerDiagnostics); - } - } else { - sb.append("Awaiting LLAP startup"); - } - break; - case RUNNING_PARTIAL: - sb.append("LLAP Starting up with ApplicationId=").append(appStatusBuilder.getAmInfo().getAppId()); - sb.append(" Started").append(appStatusBuilder.getLiveInstances()).append("/") - .append(appStatusBuilder.getDesiredInstances()).append(" instances"); - String containerDiagnostics = constructCompletedContainerDiagnostics(appStatusBuilder.getCompletedInstances()); - if (StringUtils.isNotEmpty(containerDiagnostics)) { - sb.append("\n").append(containerDiagnostics); - } - - // TODO HIVE-15865: Include information about pending requests, and last - // allocation time once YARN Service provides this information. - break; - case RUNNING_ALL: - sb.append("LLAP Application running with ApplicationId=").append(appStatusBuilder.getAmInfo().getAppId()); - break; - case COMPLETE: - sb.append("LLAP Application already complete. ApplicationId=").append(appStatusBuilder.getAmInfo().getAppId()); - containerDiagnostics = constructCompletedContainerDiagnostics(appStatusBuilder.getCompletedInstances()); - if (StringUtils.isNotEmpty(containerDiagnostics)) { - sb.append("\n").append(containerDiagnostics); - } - - break; - case UNKNOWN: - sb.append("LLAP status unknown"); - break; - } - if (StringUtils.isNotBlank(appStatusBuilder.getDiagnostics())) { - sb.append("\n").append(appStatusBuilder.getDiagnostics()); - } - - return sb.toString(); - } - - private static String constructCompletedContainerDiagnostics(List completedInstances) { - StringBuilder sb = new StringBuilder(); - if (completedInstances == null || completedInstances.size() == 0) { - return ""; - } else { - // TODO HIVE-15865 Ideally sort these by completion time, once that is available. - boolean isFirst = true; - for (LlapInstance instance : completedInstances) { - if (!isFirst) { - sb.append("\n"); - } else { - isFirst = false; - } - - if (instance.getYarnContainerExitStatus() == ContainerExitStatus.KILLED_EXCEEDED_PMEM || - instance.getYarnContainerExitStatus() == ContainerExitStatus.KILLED_EXCEEDED_VMEM) { - sb.append("\tKILLED container (by YARN for exceeding memory limits): "); - } else { - // TODO HIVE-15865 Handle additional reasons like OS launch failed - sb.append("\tFAILED container: "); - } - sb.append(" ").append(instance.getContainerId()); - sb.append(", Logs at: ").append(instance.getLogUrl()); - } - } - return sb.toString(); - } - - private static void logError(Throwable t) { - LOG.error("FAILED: " + t.getMessage(), t); - System.err.println("FAILED: " + t.getMessage()); - } -} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/AmInfo.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/AmInfo.java new file mode 100644 index 0000000..ac2ff65 --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/AmInfo.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; + +/** + * Represents the state of the yarn application. + */ +class AmInfo { + private String appName; + private String appType; + private String appId; + private String containerId; + private String hostname; + private String amWebUrl; + + AmInfo setAppName(String appName) { + this.appName = appName; + return this; + } + + AmInfo setAppType(String appType) { + this.appType = appType; + return this; + } + + AmInfo setAppId(String appId) { + this.appId = appId; + return this; + } + + AmInfo setContainerId(String containerId) { + this.containerId = containerId; + return this; + } + + AmInfo setHostname(String hostname) { + this.hostname = hostname; + return this; + } + + AmInfo setAmWebUrl(String amWebUrl) { + this.amWebUrl = amWebUrl; + return this; + } + + String getAppName() { + return appName; + } + + String getAppType() { + return appType; + } + + String getAppId() { + return appId; + } + + String getContainerId() { + return containerId; + } + + String getHostname() { + return hostname; + } + + String getAmWebUrl() { + return amWebUrl; + } + + @Override + public String toString() { + return ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE); + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/AppStatusBuilder.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/AppStatusBuilder.java new file mode 100644 index 0000000..c2ba4db --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/AppStatusBuilder.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; +import org.codehaus.jackson.annotate.JsonIgnore; + +/** + * Represents the status of the Llap application. + */ +class AppStatusBuilder { + + private AmInfo amInfo; + private State state = State.UNKNOWN; + private String diagnostics; + private String originalConfigurationPath; + private String generatedConfigurationPath; + + private Long appStartTime; + private Long appFinishTime; + + private boolean runningThresholdAchieved = false; + + private Integer desiredInstances = null; + private Integer liveInstances = null; + private Integer launchingInstances = null; + + private final List runningInstances = new LinkedList<>(); + private final List completedInstances = new LinkedList<>(); + + private final transient Map containerToRunningInstanceMap = new HashMap<>(); + private final transient Map containerToCompletedInstanceMap = new HashMap<>(); + + void setAmInfo(AmInfo amInfo) { + this.amInfo = amInfo; + } + + AppStatusBuilder setState(State state) { + this.state = state; + return this; + } + + AppStatusBuilder setDiagnostics(String diagnostics) { + this.diagnostics = diagnostics; + return this; + } + + AppStatusBuilder setOriginalConfigurationPath(String originalConfigurationPath) { + this.originalConfigurationPath = originalConfigurationPath; + return this; + } + + AppStatusBuilder setGeneratedConfigurationPath(String generatedConfigurationPath) { + this.generatedConfigurationPath = generatedConfigurationPath; + return this; + } + + AppStatusBuilder setAppStartTime(long appStartTime) { + this.appStartTime = appStartTime; + return this; + } + + AppStatusBuilder setAppFinishTime(long finishTime) { + this.appFinishTime = finishTime; + return this; + } + + void setRunningThresholdAchieved(boolean runningThresholdAchieved) { + this.runningThresholdAchieved = runningThresholdAchieved; + } + + AppStatusBuilder setDesiredInstances(int desiredInstances) { + this.desiredInstances = desiredInstances; + return this; + } + + AppStatusBuilder setLiveInstances(int liveInstances) { + this.liveInstances = liveInstances; + return this; + } + + AppStatusBuilder setLaunchingInstances(int launchingInstances) { + this.launchingInstances = launchingInstances; + return this; + } + + AppStatusBuilder addNewRunningLlapInstance(LlapInstance llapInstance) { + this.runningInstances.add(llapInstance); + this.containerToRunningInstanceMap + .put(llapInstance.getContainerId(), llapInstance); + return this; + } + + LlapInstance removeAndGetRunningLlapInstanceForContainer(String containerIdString) { + return containerToRunningInstanceMap.remove(containerIdString); + } + + void clearRunningLlapInstances() { + this.runningInstances.clear(); + this.containerToRunningInstanceMap.clear(); + } + + AppStatusBuilder clearAndAddPreviouslyKnownRunningInstances(List llapInstances) { + clearRunningLlapInstances(); + for (LlapInstance llapInstance : llapInstances) { + addNewRunningLlapInstance(llapInstance); + } + return this; + } + + @JsonIgnore + List allRunningInstances() { + return this.runningInstances; + } + + AppStatusBuilder addNewCompleteLlapInstance(LlapInstance llapInstance) { + this.completedInstances.add(llapInstance); + this.containerToCompletedInstanceMap + .put(llapInstance.getContainerId(), llapInstance); + return this; + } + + LlapInstance removeAndGetCompletedLlapInstanceForContainer(String containerIdString) { + return containerToCompletedInstanceMap.remove(containerIdString); + } + + void clearCompletedLlapInstances() { + this.completedInstances.clear(); + this.containerToCompletedInstanceMap.clear(); + } + + AppStatusBuilder clearAndAddPreviouslyKnownCompletedInstances(List llapInstances) { + clearCompletedLlapInstances(); + for (LlapInstance llapInstance : llapInstances) { + addNewCompleteLlapInstance(llapInstance); + } + return this; + } + + @JsonIgnore + List allCompletedInstances() { + return this.completedInstances; + } + + AmInfo getAmInfo() { + return amInfo; + } + + State getState() { + return state; + } + + String getDiagnostics() { + return diagnostics; + } + + String getOriginalConfigurationPath() { + return originalConfigurationPath; + } + + String getGeneratedConfigurationPath() { + return generatedConfigurationPath; + } + + Long getAppStartTime() { + return appStartTime; + } + + Long getAppFinishTime() { + return appFinishTime; + } + + boolean isRunningThresholdAchieved() { + return runningThresholdAchieved; + } + + Integer getDesiredInstances() { + return desiredInstances; + } + + Integer getLiveInstances() { + return liveInstances; + } + + Integer getLaunchingInstances() { + return launchingInstances; + } + + List getRunningInstances() { + return runningInstances; + } + + List getCompletedInstances() { + return completedInstances; + } + + @JsonIgnore + AmInfo maybeCreateAndGetAmInfo() { + if (amInfo == null) { + amInfo = new AmInfo(); + } + return amInfo; + } + + @Override + public String toString() { + return ToStringBuilder.reflectionToString(this, ToStringStyle.MULTI_LINE_STYLE); + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/ExitCode.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/ExitCode.java new file mode 100644 index 0000000..85f4cff --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/ExitCode.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +/** + * Enumeration of the potential outcomes of the Llap state checking. + */ +enum ExitCode { + SUCCESS(0), + INCORRECT_USAGE(10), + YARN_ERROR(20), + SERVICE_CLIENT_ERROR_CREATE_FAILED(30), + SERVICE_CLIENT_ERROR_OTHER(31), + LLAP_REGISTRY_ERROR(40), + LLAP_JSON_GENERATION_ERROR(50), + // Error in the script itself - likely caused by an incompatible change, or new functionality / states added. + INTERNAL_ERROR(100); + + private final int code; + + ExitCode(int code) { + this.code = code; + } + + public int getCode() { + return code; + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapInstance.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapInstance.java new file mode 100644 index 0000000..5b979fd --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapInstance.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; + +/** + * Representing the state of an Llap instance monitored. + */ +class LlapInstance { + private final String hostname; + private final String containerId; + private String logUrl; + + // Only for live instances. + private String statusUrl; + private String webUrl; + private Integer rpcPort; + private Integer mgmtPort; + private Integer shufflePort; + + // For completed instances + private String diagnostics; + private int yarnContainerExitStatus; + + // TODO HIVE-13454 Add additional information such as #executors, container size, etc + + LlapInstance(String hostname, String containerId) { + this.hostname = hostname; + this.containerId = containerId; + } + + LlapInstance setLogUrl(String logUrl) { + this.logUrl = logUrl; + return this; + } + + LlapInstance setStatusUrl(String statusUrl) { + this.statusUrl = statusUrl; + return this; + } + + LlapInstance setWebUrl(String webUrl) { + this.webUrl = webUrl; + return this; + } + + LlapInstance setRpcPort(int rpcPort) { + this.rpcPort = rpcPort; + return this; + } + + LlapInstance setMgmtPort(int mgmtPort) { + this.mgmtPort = mgmtPort; + return this; + } + + LlapInstance setShufflePort(int shufflePort) { + this.shufflePort = shufflePort; + return this; + } + + LlapInstance setDiagnostics(String diagnostics) { + this.diagnostics = diagnostics; + return this; + } + + LlapInstance setYarnContainerExitStatus(int yarnContainerExitStatus) { + this.yarnContainerExitStatus = yarnContainerExitStatus; + return this; + } + + String getHostname() { + return hostname; + } + + String getContainerId() { + return containerId; + } + + String getLogUrl() { + return logUrl; + } + + String getStatusUrl() { + return statusUrl; + } + + String getWebUrl() { + return webUrl; + } + + Integer getRpcPort() { + return rpcPort; + } + + Integer getMgmtPort() { + return mgmtPort; + } + + Integer getShufflePort() { + return shufflePort; + } + + String getDiagnostics() { + return diagnostics; + } + + int getYarnContainerExitStatus() { + return yarnContainerExitStatus; + } + + @Override + public String toString() { + return ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE); + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusCliException.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusCliException.java new file mode 100644 index 0000000..7ebf404 --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusCliException.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +/** + * Representing the exceptions that may occur during the Llap state chacking. + */ +class LlapStatusCliException extends Exception { + private final ExitCode exitCode; + + LlapStatusCliException(ExitCode exitCode, String message) { + super(exitCode.getCode() +": " + message); + this.exitCode = exitCode; + } + + LlapStatusCliException(ExitCode exitCode, String message, Throwable cause) { + super(exitCode.getCode() +": " + message, cause); + this.exitCode = exitCode; + } + + ExitCode getExitCode() { + return exitCode; + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusHelpers.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusHelpers.java deleted file mode 100644 index 5c8aeb0..0000000 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusHelpers.java +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.llap.cli.status; - -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver; -import org.codehaus.jackson.annotate.JsonIgnore; - -public class LlapStatusHelpers { - public enum State { - APP_NOT_FOUND, LAUNCHING, - RUNNING_PARTIAL, - RUNNING_ALL, COMPLETE, UNKNOWN - } - - public static class AmInfo { - private String appName; - private String appType; - private String appId; - private String containerId; - private String hostname; - private String amWebUrl; - - public AmInfo setAppName(String appName) { - this.appName = appName; - return this; - } - - public AmInfo setAppType(String appType) { - this.appType = appType; - return this; - } - - public AmInfo setAppId(String appId) { - this.appId = appId; - return this; - } - - public AmInfo setContainerId(String containerId) { - this.containerId = containerId; - return this; - } - - public AmInfo setHostname(String hostname) { - this.hostname = hostname; - return this; - } - - public AmInfo setAmWebUrl(String amWebUrl) { - this.amWebUrl = amWebUrl; - return this; - } - - public String getAppName() { - return appName; - } - - public String getAppType() { - return appType; - } - - public String getAppId() { - return appId; - } - - public String getContainerId() { - return containerId; - } - - public String getHostname() { - return hostname; - } - - public String getAmWebUrl() { - return amWebUrl; - } - - @Override - public String toString() { - return "AmInfo{" + - "appName='" + appName + '\'' + - ", appType='" + appType + '\'' + - ", appId='" + appId + '\'' + - ", containerId='" + containerId + '\'' + - ", hostname='" + hostname + '\'' + - ", amWebUrl='" + amWebUrl + '\'' + - '}'; - } - } - - public static class LlapInstance { - private final String hostname; - private final String containerId; - private String logUrl; - - // Only for live instances. - private String statusUrl; - private String webUrl; - private Integer rpcPort; - private Integer mgmtPort; - private Integer shufflePort; - - // For completed instances - private String diagnostics; - private int yarnContainerExitStatus; - - // TODO HIVE-13454 Add additional information such as #executors, container size, etc - - public LlapInstance(String hostname, String containerId) { - this.hostname = hostname; - this.containerId = containerId; - } - - public LlapInstance setLogUrl(String logUrl) { - this.logUrl = logUrl; - return this; - } - - public LlapInstance setWebUrl(String webUrl) { - this.webUrl = webUrl; - return this; - } - - public LlapInstance setStatusUrl(String statusUrl) { - this.statusUrl = statusUrl; - return this; - } - - public LlapInstance setRpcPort(int rpcPort) { - this.rpcPort = rpcPort; - return this; - } - - public LlapInstance setMgmtPort(int mgmtPort) { - this.mgmtPort = mgmtPort; - return this; - } - - public LlapInstance setShufflePort(int shufflePort) { - this.shufflePort = shufflePort; - return this; - } - - public LlapInstance setDiagnostics(String diagnostics) { - this.diagnostics = diagnostics; - return this; - } - - public LlapInstance setYarnContainerExitStatus(int yarnContainerExitStatus) { - this.yarnContainerExitStatus = yarnContainerExitStatus; - return this; - } - - public String getHostname() { - return hostname; - } - - public String getLogUrl() { - return logUrl; - } - - public String getStatusUrl() { - return statusUrl; - } - - public String getContainerId() { - return containerId; - } - - public String getWebUrl() { - return webUrl; - } - - public Integer getRpcPort() { - return rpcPort; - } - - public Integer getMgmtPort() { - return mgmtPort; - } - - public Integer getShufflePort() { - return shufflePort; - } - - public String getDiagnostics() { - return diagnostics; - } - - public int getYarnContainerExitStatus() { - return yarnContainerExitStatus; - } - - @Override - public String toString() { - return "LlapInstance{" + - "hostname='" + hostname + '\'' + - "logUrl=" + logUrl + '\'' + - ", containerId='" + containerId + '\'' + - ", statusUrl='" + statusUrl + '\'' + - ", webUrl='" + webUrl + '\'' + - ", rpcPort=" + rpcPort + - ", mgmtPort=" + mgmtPort + - ", shufflePort=" + shufflePort + - ", diagnostics=" + diagnostics + - ", yarnContainerExitStatus=" + yarnContainerExitStatus + - '}'; - } - } - - public static final class AppStatusBuilder { - - private AmInfo amInfo; - private State state = State.UNKNOWN; - private String diagnostics; - private String originalConfigurationPath; - private String generatedConfigurationPath; - - private Integer desiredInstances = null; - private Integer liveInstances = null; - private Integer launchingInstances = null; - - - private Long appStartTime; - private Long appFinishTime; - - private boolean runningThresholdAchieved = false; - - private final List runningInstances = new LinkedList<>(); - private final List completedInstances = new LinkedList<>(); - - private transient final Map - containerToRunningInstanceMap = new HashMap<>(); - private transient final Map - containerToCompletedInstanceMap = new HashMap<>(); - - public void setAmInfo(AmInfo amInfo) { - this.amInfo = amInfo; - } - - public AppStatusBuilder setState( - State state) { - this.state = state; - return this; - } - - public AppStatusBuilder setDiagnostics(String diagnostics) { - this.diagnostics = diagnostics; - return this; - } - - public AppStatusBuilder setOriginalConfigurationPath(String originalConfigurationPath) { - this.originalConfigurationPath = originalConfigurationPath; - return this; - } - - public AppStatusBuilder setGeneratedConfigurationPath(String generatedConfigurationPath) { - this.generatedConfigurationPath = generatedConfigurationPath; - return this; - } - - public AppStatusBuilder setAppStartTime(long appStartTime) { - this.appStartTime = appStartTime; - return this; - } - - public AppStatusBuilder setAppFinishTime(long finishTime) { - this.appFinishTime = finishTime; - return this; - } - - public void setRunningThresholdAchieved(boolean runningThresholdAchieved) { - this.runningThresholdAchieved = runningThresholdAchieved; - } - - public AppStatusBuilder setDesiredInstances(int desiredInstances) { - this.desiredInstances = desiredInstances; - return this; - } - - public AppStatusBuilder setLiveInstances(int liveInstances) { - this.liveInstances = liveInstances; - return this; - } - - public AppStatusBuilder setLaunchingInstances(int launchingInstances) { - this.launchingInstances = launchingInstances; - return this; - } - - public AppStatusBuilder addNewRunningLlapInstance(LlapInstance llapInstance) { - this.runningInstances.add(llapInstance); - this.containerToRunningInstanceMap - .put(llapInstance.getContainerId(), llapInstance); - return this; - } - - public LlapInstance removeAndGetRunningLlapInstanceForContainer(String containerIdString) { - return containerToRunningInstanceMap.remove(containerIdString); - } - - public void clearRunningLlapInstances() { - this.runningInstances.clear(); - this.containerToRunningInstanceMap.clear(); - } - - public AppStatusBuilder clearAndAddPreviouslyKnownRunningInstances(List llapInstances) { - clearRunningLlapInstances(); - for (LlapInstance llapInstance : llapInstances) { - addNewRunningLlapInstance(llapInstance); - } - return this; - } - - @JsonIgnore - public List allRunningInstances() { - return this.runningInstances; - } - - public AppStatusBuilder addNewCompleteLlapInstance(LlapInstance llapInstance) { - this.completedInstances.add(llapInstance); - this.containerToCompletedInstanceMap - .put(llapInstance.getContainerId(), llapInstance); - return this; - } - - public LlapInstance removeAndGetCompletedLlapInstanceForContainer(String containerIdString) { - return containerToCompletedInstanceMap.remove(containerIdString); - } - - public void clearCompletedLlapInstances() { - this.completedInstances.clear(); - this.containerToCompletedInstanceMap.clear(); - } - - public AppStatusBuilder clearAndAddPreviouslyKnownCompletedInstances(List llapInstances) { - clearCompletedLlapInstances(); - for (LlapInstance llapInstance : llapInstances) { - addNewCompleteLlapInstance(llapInstance); - } - return this; - } - - @JsonIgnore - public List allCompletedInstances() { - return this.completedInstances; - } - - public AmInfo getAmInfo() { - return amInfo; - } - - public State getState() { - return state; - } - - public String getDiagnostics() { - return diagnostics; - } - - public String getOriginalConfigurationPath() { - return originalConfigurationPath; - } - - public String getGeneratedConfigurationPath() { - return generatedConfigurationPath; - } - - public Integer getDesiredInstances() { - return desiredInstances; - } - - public Integer getLiveInstances() { - return liveInstances; - } - - public Integer getLaunchingInstances() { - return launchingInstances; - } - - public Long getAppStartTime() { - return appStartTime; - } - - public Long getAppFinishTime() { - return appFinishTime; - } - - public boolean isRunningThresholdAchieved() { - return runningThresholdAchieved; - } - - public List getRunningInstances() { - return runningInstances; - } - - public List getCompletedInstances() { - return completedInstances; - } - - @JsonIgnore - public AmInfo maybeCreateAndGetAmInfo() { - if (amInfo == null) { - amInfo = new AmInfo(); - } - return amInfo; - } - - @Override - public String toString() { - return "AppStatusBuilder{" + - "amInfo=" + amInfo + - ", state=" + state + - ", diagnostics=" + diagnostics + - ", originalConfigurationPath='" + originalConfigurationPath + '\'' + - ", generatedConfigurationPath='" + generatedConfigurationPath + '\'' + - ", desiredInstances=" + desiredInstances + - ", liveInstances=" + liveInstances + - ", launchingInstances=" + launchingInstances + - ", appStartTime=" + appStartTime + - ", appFinishTime=" + appFinishTime + - ", runningThresholdAchieved=" + runningThresholdAchieved + - ", runningInstances=" + runningInstances + - ", completedInstances=" + completedInstances + - ", containerToRunningInstanceMap=" + containerToRunningInstanceMap + - '}'; - } - } -} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java new file mode 100644 index 0000000..a2ee252 --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +import java.util.Arrays; +import java.util.Properties; + +import jline.TerminalFactory; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Parses, verifies, prints and provides the command line arguments of the Llap Status program. + */ +class LlapStatusServiceCommandLine { + private static final Logger LOGGER = LoggerFactory.getLogger("LlapStatusServiceDriverConsole"); + + @VisibleForTesting + static final long DEFAULT_FIND_YARN_APP_TIMEOUT_MS = 20 * 1000L; + @VisibleForTesting + static final long DEFAULT_STATUS_REFRESH_INTERVAL_MS = 1 * 1000L; + @VisibleForTesting + static final long DEFAULT_WATCH_MODE_TIMEOUT_MS = 5 * 60 * 1000L; + @VisibleForTesting + static final float DEFAULT_RUNNING_NODES_THRESHOLD = 1.0f; + + @SuppressWarnings("static-access") + private static final Option NAME = OptionBuilder + .withLongOpt("name") + .withDescription("LLAP cluster name") + .withArgName("name") + .hasArg() + .create('n'); + + @SuppressWarnings("static-access") + private static final Option FIND_APP_TIMEOUT = OptionBuilder + .withLongOpt("findAppTimeout") + .withDescription("Amount of time(s) that the tool will sleep to wait for the YARN application to start." + + "negative values=wait forever, 0=Do not wait. default=" + (DEFAULT_FIND_YARN_APP_TIMEOUT_MS / 1000) + "s") + .withArgName("findAppTimeout") + .hasArg() + .create('f'); + + @SuppressWarnings("static-access") + private static final Option OUTPUT_FILE = OptionBuilder + .withLongOpt("outputFile") + .withDescription("File to which output should be written (Default stdout)") + .withArgName("outputFile") + .hasArg() + .create('o'); + + @SuppressWarnings("static-access") + private static final Option WATCH_MODE = OptionBuilder + .withLongOpt("watch") + .withDescription("Watch mode waits until all LLAP daemons are running or subset of the nodes are running " + + "(threshold can be specified via -r option) (Default wait until all nodes are running)") + .withArgName("watch") + .create('w'); + + @SuppressWarnings("static-access") + private static final Option NOT_LAUNCHED = OptionBuilder + .withLongOpt("notLaunched") + .withDescription("In watch mode, do not assume that the application was already launched if there's doubt " + + "(e.g. if the last application instance has failed).") + .withArgName("notLaunched") + .create('l'); + + @SuppressWarnings("static-access") + private static final Option RUNNING_NODES_THRESHOLD = OptionBuilder + .withLongOpt("runningNodesThreshold") + .withDescription("When watch mode is enabled (-w), wait until the specified threshold of nodes are running " + + "(Default 1.0 which means 100% nodes are running)") + .withArgName("runningNodesThreshold") + .hasArg() + .create('r'); + + @SuppressWarnings("static-access") + private static final Option REFRESH_INTERVAL = OptionBuilder + .withLongOpt("refreshInterval") + .withDescription("Amount of time in seconds to wait until subsequent status checks in watch mode. Valid only " + + "for watch mode. (Default " + (DEFAULT_STATUS_REFRESH_INTERVAL_MS / 1000) + "s)") + .withArgName("refreshInterval") + .hasArg() + .create('i'); + + @SuppressWarnings("static-access") + private static final Option WATCH_TIMEOUT = OptionBuilder + .withLongOpt("watchTimeout") + .withDescription("Exit watch mode if the desired state is not attained until the specified timeout. (Default " + + (DEFAULT_WATCH_MODE_TIMEOUT_MS / 1000) + "s)") + .withArgName("watchTimeout") + .hasArg() + .create('t'); + + @SuppressWarnings("static-access") + private static final Option HIVECONF = OptionBuilder + .withLongOpt("hiveconf") + .withDescription("Use value for given property. Overridden by explicit parameters") + .withArgName("property=value") + .hasArgs(2) + .create(); + + @SuppressWarnings("static-access") + private static final Option HELP = OptionBuilder + .withLongOpt("help") + .withDescription("Print help information") + .withArgName("help") + .create('h'); + + private static final Options OPTIONS = new Options(); + { + OPTIONS.addOption(NAME); + OPTIONS.addOption(FIND_APP_TIMEOUT); + OPTIONS.addOption(OUTPUT_FILE); + OPTIONS.addOption(WATCH_MODE); + OPTIONS.addOption(NOT_LAUNCHED); + OPTIONS.addOption(RUNNING_NODES_THRESHOLD); + OPTIONS.addOption(REFRESH_INTERVAL); + OPTIONS.addOption(WATCH_TIMEOUT); + OPTIONS.addOption(HIVECONF); + OPTIONS.addOption(HELP); + } + + private String name; + private long findAppTimeoutMs; + private String outputFile; + private boolean watchMode; + private boolean isLaunched; + private float runningNodesThreshold; + private long refreshIntervalMs; + private long watchTimeoutMs; + private Properties hiveConf; + private boolean isHelp; + + static LlapStatusServiceCommandLine parseArguments(String[] args) { + LlapStatusServiceCommandLine cl = null; + try { + cl = new LlapStatusServiceCommandLine(args); + } catch (Exception e) { + LOGGER.error("Parsing the command line arguments failed", e); + printUsage(); + System.exit(ExitCode.INCORRECT_USAGE.getCode()); + } + + if (cl.isHelp()) { + printUsage(); + System.exit(0); + } + + return cl; + } + + LlapStatusServiceCommandLine(String[] args) throws ParseException { + LOGGER.info("LLAP status invoked with arguments = {}", Arrays.toString(args)); + parseCommandLine(args); + printArguments(); + } + + private void parseCommandLine(String[] args) throws ParseException { + CommandLine cl = new GnuParser().parse(OPTIONS, args); + + name = cl.getOptionValue(NAME.getLongOpt()); + + findAppTimeoutMs = DEFAULT_FIND_YARN_APP_TIMEOUT_MS; + if (cl.hasOption(FIND_APP_TIMEOUT.getLongOpt())) { + findAppTimeoutMs = Long.parseLong(cl.getOptionValue(FIND_APP_TIMEOUT.getLongOpt())) * 1000; + } + + if (cl.hasOption(OUTPUT_FILE.getLongOpt())) { + outputFile = cl.getOptionValue(OUTPUT_FILE.getLongOpt()); + } + + watchMode = cl.hasOption(WATCH_MODE.getLongOpt()); + + isLaunched = !cl.hasOption(NOT_LAUNCHED.getLongOpt()); + + runningNodesThreshold = DEFAULT_RUNNING_NODES_THRESHOLD; + if (cl.hasOption(RUNNING_NODES_THRESHOLD.getLongOpt())) { + runningNodesThreshold = Float.parseFloat(cl.getOptionValue(RUNNING_NODES_THRESHOLD.getLongOpt())); + if (runningNodesThreshold < 0.0f || runningNodesThreshold > 1.0f) { + throw new IllegalArgumentException("Running nodes threshold value should be between 0.0 and 1.0 (inclusive)"); + } + } + + refreshIntervalMs = DEFAULT_STATUS_REFRESH_INTERVAL_MS; + if (cl.hasOption(REFRESH_INTERVAL.getLongOpt())) { + long refreshIntervalSec = Long.parseLong(cl.getOptionValue(REFRESH_INTERVAL.getLongOpt())); + if (refreshIntervalSec <= 0) { + throw new IllegalArgumentException("Refresh interval should be >0"); + } + refreshIntervalMs = refreshIntervalSec * 1000; + } + + watchTimeoutMs = DEFAULT_WATCH_MODE_TIMEOUT_MS; + if (cl.hasOption(WATCH_TIMEOUT.getLongOpt())) { + long watchTimeoutSec = Long.parseLong(cl.getOptionValue(WATCH_TIMEOUT.getLongOpt())); + if (watchTimeoutSec <= 0) { + throw new IllegalArgumentException("Watch timeout should be >0"); + } + watchTimeoutMs = watchTimeoutSec * 1000; + } + + hiveConf = new Properties(); + if (cl.hasOption(HIVECONF.getLongOpt())) { + hiveConf = cl.getOptionProperties(HIVECONF.getLongOpt()); + } + + isHelp = cl.hasOption(HELP.getOpt()); + } + + private static void printUsage() { + HelpFormatter hf = new HelpFormatter(); + try { + int width = hf.getWidth(); + int jlineWidth = TerminalFactory.get().getWidth(); + width = Math.min(160, Math.max(jlineWidth, width)); + hf.setWidth(width); + } catch (Throwable t) { // Ignore + } + + hf.printHelp("llapstatus", OPTIONS); + } + + private void printArguments() { + LOGGER.info("LLAP status running with the following parsed arguments: \n" + + "\tname : " + name + "\n" + + "\tfindAppTimeoutMs : " + findAppTimeoutMs + "\n" + + "\toutputFile : " + outputFile + "\n" + + "\twatchMode : " + watchMode + "\n" + + "\tisLaunched : " + isLaunched + "\n" + + "\trunningNodesThreshold: " + runningNodesThreshold + "\n" + + "\trefreshIntervalMs : " + refreshIntervalMs + "\n" + + "\twatchTimeoutMs : " + watchTimeoutMs + "\n" + + "\thiveConf : " + hiveConf); + } + + String getName() { + return name; + } + + long getFindAppTimeoutMs() { + return findAppTimeoutMs; + } + + String getOutputFile() { + return outputFile; + } + + boolean isWatchMode() { + return watchMode; + } + + boolean isLaunched() { + return isLaunched; + } + + float getRunningNodesThreshold() { + return runningNodesThreshold; + } + + long getRefreshIntervalMs() { + return refreshIntervalMs; + } + + long getWatchTimeoutMs() { + return watchTimeoutMs; + } + + Properties getHiveConf() { + return hiveConf; + } + + boolean isHelp() { + return isHelp; + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceDriver.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceDriver.java new file mode 100644 index 0000000..bd48af1 --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceDriver.java @@ -0,0 +1,775 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.Writer; +import java.nio.charset.Charset; +import java.text.DecimalFormat; +import java.util.Collection; +import java.util.EnumSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.llap.cli.LlapSliderUtils; +import org.apache.hadoop.hive.llap.configuration.LlapDaemonConfiguration; +import org.apache.hadoop.hive.llap.registry.LlapServiceInstance; +import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.service.api.records.Container; +import org.apache.hadoop.yarn.service.api.records.Service; +import org.apache.hadoop.yarn.service.api.records.ServiceState; +import org.apache.hadoop.yarn.service.client.ServiceClient; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; +import org.codehaus.jackson.annotate.JsonAutoDetect.Visibility; +import org.codehaus.jackson.annotate.JsonMethod; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.SerializationConfig; +import org.codehaus.jackson.map.annotate.JsonSerialize; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Checks the status of the Llap. + */ +public class LlapStatusServiceDriver { + private static final Logger LOG = LoggerFactory.getLogger(LlapStatusServiceDriver.class); + private static final Logger CONSOLE_LOGGER = LoggerFactory.getLogger("LlapStatusServiceDriverConsole"); + + private static final EnumSet NO_YARN_SERVICE_INFO_STATES = EnumSet.of( + State.APP_NOT_FOUND, State.COMPLETE, State.LAUNCHING); + private static final EnumSet LAUNCHING_STATES = EnumSet.of( + State.LAUNCHING, State.RUNNING_PARTIAL, State.RUNNING_ALL); + + // Defining a bunch of configs here instead of in HiveConf. These are experimental, and mainly + // for use when retry handling is fixed in Yarn/Hadoop + + private static final String CONF_PREFIX = "hive.llapcli."; + + // The following two keys should ideally be used to control RM connect timeouts. However, + // they don't seem to work. The IPC timeout needs to be set instead. + private static final String CONFIG_YARN_RM_TIMEOUT_MAX_WAIT_MS = CONF_PREFIX + "yarn.rm.connect.max-wait-ms"; + private static final long CONFIG_YARN_RM_TIMEOUT_MAX_WAIT_MS_DEFAULT = 10000L; + private static final String CONFIG_YARN_RM_RETRY_INTERVAL_MS = CONF_PREFIX + "yarn.rm.connect.retry-interval.ms"; + private static final long CONFIG_YARN_RM_RETRY_INTERVAL_MS_DEFAULT = 5000L; + + // As of Hadoop 2.7 - this is what controls the RM timeout. + private static final String CONFIG_IPC_CLIENT_CONNECT_MAX_RETRIES = CONF_PREFIX + "ipc.client.max-retries"; + private static final int CONFIG_IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT = 2; + private static final String CONFIG_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS = + CONF_PREFIX + "ipc.client.connect.retry-interval-ms"; + private static final long CONFIG_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS_DEFAULT = 1500L; + + // As of Hadoop 2.8 - this timeout spec behaves in a strnage manner. "2000,1" means 2000s with 1 retry. + // However it does this - but does it thrice. Essentially - #retries+2 is the number of times the entire config + // is retried. "2000,1" means 3 retries - each with 1 retry with a random 2000ms sleep. + private static final String CONFIG_TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC = + CONF_PREFIX + "timeline.service.fs-store.retry.policy.spec"; + private static final String + CONFIG_TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC_DEFAULT = "2000, 1"; + + private static final String CONFIG_LLAP_ZK_REGISTRY_TIMEOUT_MS = CONF_PREFIX + "zk-registry.timeout-ms"; + private static final long CONFIG_LLAP_ZK_REGISTRY_TIMEOUT_MS_DEFAULT = 20000L; + + private static final long LOG_SUMMARY_INTERVAL = 15000L; // Log summary every ~15 seconds. + private static final String LLAP_KEY = "llap"; + + private final Configuration conf; + private String appName = null; + private String applicationId = null; + private ServiceClient serviceClient = null; + private Configuration llapRegistryConf = null; + private LlapRegistryService llapRegistry = null; + + private AppStatusBuilder appStatusBuilder; + + private static LlapStatusServiceDriver createServiceDriver() { + LlapStatusServiceDriver statusServiceDriver = null; + try { + statusServiceDriver = new LlapStatusServiceDriver(); + } catch (Throwable t) { + logError(t); + System.exit(ExitCode.INTERNAL_ERROR.getCode()); + } + return statusServiceDriver; + } + + public LlapStatusServiceDriver() { + SessionState ss = SessionState.get(); + conf = (ss != null) ? ss.getConf() : new HiveConf(SessionState.class); + setupConf(); + } + + private void setupConf() { + for (String f : LlapDaemonConfiguration.DAEMON_CONFIGS) { + conf.addResource(f); + } + conf.reloadConfiguration(); + + // Setup timeouts for various services. + + // Once we move to a Hadoop-2.8 dependency, the following paramteer can be used. + // conf.set(YarnConfiguration.TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC); + conf.set("yarn.timeline-service.entity-group-fs-store.retry-policy-spec", + conf.get(CONFIG_TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC, + CONFIG_TIMELINE_SERVICE_ENTITYGROUP_FS_STORE_RETRY_POLICY_SPEC_DEFAULT)); + + conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, + conf.getLong(CONFIG_YARN_RM_TIMEOUT_MAX_WAIT_MS, CONFIG_YARN_RM_TIMEOUT_MAX_WAIT_MS_DEFAULT)); + conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, + conf.getLong(CONFIG_YARN_RM_RETRY_INTERVAL_MS, CONFIG_YARN_RM_RETRY_INTERVAL_MS_DEFAULT)); + + conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, + conf.getInt(CONFIG_IPC_CLIENT_CONNECT_MAX_RETRIES, CONFIG_IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT)); + conf.setLong(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_RETRY_INTERVAL_KEY, + conf.getLong(CONFIG_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS, CONFIG_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS_DEFAULT)); + + HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT, ( + conf.getLong(CONFIG_LLAP_ZK_REGISTRY_TIMEOUT_MS, CONFIG_LLAP_ZK_REGISTRY_TIMEOUT_MS_DEFAULT) + "ms")); + + llapRegistryConf = new Configuration(conf); + } + + public ExitCode run(LlapStatusServiceCommandLine cl, long watchTimeoutMs) { + appStatusBuilder = new AppStatusBuilder(); + try { + if (appName == null) { + // user provided configs + for (Map.Entry props : cl.getHiveConf().entrySet()) { + conf.set((String) props.getKey(), (String) props.getValue()); + } + + appName = cl.getName(); + if (StringUtils.isEmpty(appName)) { + appName = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_DAEMON_SERVICE_HOSTS); + if (appName.startsWith("@") && appName.length() > 1) { + // This is a valid YARN Service name. Parse it out. + appName = appName.substring(1); + } else { + // Invalid app name. Checked later. + appName = null; + } + } + if (StringUtils.isEmpty(appName)) { + LOG.error("Invalid app name. This must be setup via config or passed in as a parameter." + + " This tool works with clusters deployed by YARN Service"); + return ExitCode.INCORRECT_USAGE; + } + LOG.debug("Using appName: {}", appName); + + llapRegistryConf.set(HiveConf.ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + appName); + } + + try { + if (serviceClient == null) { + serviceClient = LlapSliderUtils.createServiceClient(conf); + } + } catch (Exception e) { + LlapStatusCliException le = new LlapStatusCliException( + ExitCode.SERVICE_CLIENT_ERROR_CREATE_FAILED, "Failed to create service client", e); + logError(le); + return le.getExitCode(); + } + + // Get the App report from YARN + ApplicationReport appReport; + try { + appReport = getAppReport(appName, serviceClient, cl.getFindAppTimeoutMs()); + } catch (LlapStatusCliException e) { + logError(e); + return e.getExitCode(); + } + + // Process the report + ExitCode ret; + try { + ret = processAppReport(appReport, appStatusBuilder); + } catch (LlapStatusCliException e) { + logError(e); + return e.getExitCode(); + } + + if (ret != ExitCode.SUCCESS) { + return ret; + } else if (NO_YARN_SERVICE_INFO_STATES.contains(appStatusBuilder.getState())) { + return ExitCode.SUCCESS; + } else { + // Get information from YARN Service + try { + ret = populateAppStatusFromServiceStatus(appName, serviceClient, appStatusBuilder); + } catch (LlapStatusCliException e) { + // In case of failure, send back whatever is constructed so far - which would be from the AppReport + logError(e); + return e.getExitCode(); + } + } + + if (ret != ExitCode.SUCCESS) { + return ret; + } else { + try { + ret = populateAppStatusFromLlapRegistry(appStatusBuilder, watchTimeoutMs); + } catch (LlapStatusCliException e) { + logError(e); + return e.getExitCode(); + } + } + + return ret; + } finally { + LOG.debug("Final AppState: " + appStatusBuilder.toString()); + } + } + + private ApplicationReport getAppReport(String appName, ServiceClient serviceClient, long timeoutMs) + throws LlapStatusCliException { + Clock clock = SystemClock.getInstance(); + long startTime = clock.getTime(); + long timeoutTime = timeoutMs < 0 ? Long.MAX_VALUE : (startTime + timeoutMs); + ApplicationReport appReport = null; + ApplicationId appId; + try { + appId = serviceClient.getAppId(appName); + } catch (YarnException | IOException e) { + return null; + } + + while (appReport == null) { + try { + appReport = serviceClient.getYarnClient().getApplicationReport(appId); + if (timeoutMs == 0) { + // break immediately if timeout is 0 + break; + } + // Otherwise sleep, and try again. + if (appReport == null) { + long remainingTime = Math.min(timeoutTime - clock.getTime(), 500L); + if (remainingTime > 0) { + Thread.sleep(remainingTime); + } else { + break; + } + } + } catch (Exception e) { // No point separating IOException vs YarnException vs others + throw new LlapStatusCliException(ExitCode.YARN_ERROR, "Failed to get Yarn AppReport", e); + } + } + return appReport; + } + + public void outputJson(PrintWriter writer) throws LlapStatusCliException { + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false); + mapper.setSerializationInclusion(JsonSerialize.Inclusion.NON_NULL); + mapper.setSerializationInclusion(JsonSerialize.Inclusion.NON_EMPTY); + mapper.setVisibility(JsonMethod.ALL, Visibility.NON_PRIVATE); + try { + writer.println(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(appStatusBuilder)); + } catch (IOException e) { + LOG.warn("Failed to create JSON", e); + throw new LlapStatusCliException(ExitCode.LLAP_JSON_GENERATION_ERROR, "Failed to create JSON", e); + } + } + + /** + * Populates parts of the AppStatus. + * + * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible + * @throws LlapStatusCliException + */ + private ExitCode processAppReport(ApplicationReport appReport, AppStatusBuilder appStatusBuilder) + throws LlapStatusCliException { + if (appReport == null) { + appStatusBuilder.setState(State.APP_NOT_FOUND); + LOG.info("No Application Found"); + return ExitCode.SUCCESS; + } + + applicationId = appReport.getApplicationId().toString(); + + // TODO Maybe add the YARN URL for the app. + appStatusBuilder.setAmInfo( + new AmInfo().setAppName(appReport.getName()).setAppType(appReport.getApplicationType())); + appStatusBuilder.setAppStartTime(appReport.getStartTime()); + switch (appReport.getYarnApplicationState()) { + case NEW: + case NEW_SAVING: + case SUBMITTED: + appStatusBuilder.setState(State.LAUNCHING); + return ExitCode.SUCCESS; + case ACCEPTED: + appStatusBuilder.maybeCreateAndGetAmInfo().setAppId(applicationId); + appStatusBuilder.setState(State.LAUNCHING); + return ExitCode.SUCCESS; + case RUNNING: + appStatusBuilder.maybeCreateAndGetAmInfo().setAppId(applicationId); + // If the app state is running, get additional information from YARN Service + return ExitCode.SUCCESS; + case FINISHED: + case FAILED: + case KILLED: + appStatusBuilder.maybeCreateAndGetAmInfo().setAppId(applicationId); + appStatusBuilder.setAppFinishTime(appReport.getFinishTime()); + appStatusBuilder.setState(State.COMPLETE); + // add log links and other diagnostics from YARN Service + return ExitCode.SUCCESS; + default: + throw new LlapStatusCliException(ExitCode.INTERNAL_ERROR, + "Unknown Yarn Application State: " + appReport.getYarnApplicationState()); + } + } + + /** + * Populates information from YARN Service Status. + * + * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible + * @throws LlapStatusCliException + */ + private ExitCode populateAppStatusFromServiceStatus(String appName, ServiceClient serviceClient, + AppStatusBuilder appStatusBuilder) throws LlapStatusCliException { + ExitCode exitCode = ExitCode.YARN_ERROR; + try { + Service service = serviceClient.getStatus(appName); + if (service != null) { + // How to get config paths and AmInfo + ServiceState state = service.getState(); + appStatusBuilder.setAppStartTime(service.getLaunchTime() == null ? 0 : service.getLaunchTime().getTime()); + appStatusBuilder.setDesiredInstances(service.getComponent(LLAP_KEY).getNumberOfContainers() == null ? 0 + : service.getComponent(LLAP_KEY).getNumberOfContainers().intValue()); + appStatusBuilder.setLiveInstances(service.getComponent(LLAP_KEY).getContainers().size()); + for (Container cont : service.getComponent(LLAP_KEY).getContainers()) { + LlapInstance llapInstance = new LlapInstance(cont.getHostname(), cont.getId()); + appStatusBuilder.addNewRunningLlapInstance(llapInstance); + } + if (state == ServiceState.STABLE) { + exitCode = ExitCode.SUCCESS; + } + } else { + exitCode = ExitCode.SERVICE_CLIENT_ERROR_OTHER; + } + } catch (IOException | YarnException e) { + LlapStatusCliException le = new LlapStatusCliException( + ExitCode.SERVICE_CLIENT_ERROR_OTHER, "Failed to get service status", e); + logError(le); + exitCode = le.getExitCode(); + } + return exitCode; + } + + /** + * Populate additional information for containers from the LLAP registry. Must be invoked + * after YARN Service status and diagnostics. + * @return an ExitCode. An ExitCode other than ExitCode.SUCCESS implies future progress not possible + * @throws LlapStatusCliException + */ + private ExitCode populateAppStatusFromLlapRegistry(AppStatusBuilder appStatusBuilder, long watchTimeoutMs) + throws LlapStatusCliException { + + if (llapRegistry == null) { + try { + llapRegistry = LlapRegistryService.getClient(llapRegistryConf); + } catch (Exception e) { + throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, + "Failed to create llap registry client", e); + } + } + + Collection serviceInstances; + try { + serviceInstances = llapRegistry.getInstances(watchTimeoutMs).getAll(); + } catch (Exception e) { + throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e); + } + + if (serviceInstances == null || serviceInstances.isEmpty()) { + LOG.debug("No information found in the LLAP registry"); + appStatusBuilder.setLiveInstances(0); + appStatusBuilder.setState(State.LAUNCHING); + appStatusBuilder.clearRunningLlapInstances(); + return ExitCode.SUCCESS; + } else { + // Tracks instances known by both YARN Service and llap. + List validatedInstances = new LinkedList<>(); + List llapExtraInstances = new LinkedList<>(); + + for (LlapServiceInstance serviceInstance : serviceInstances) { + String containerIdString = serviceInstance.getProperties().get( + HiveConf.ConfVars.LLAP_DAEMON_CONTAINER_ID.varname); + + LlapInstance llapInstance = appStatusBuilder.removeAndGetRunningLlapInstanceForContainer(containerIdString); + if (llapInstance != null) { + llapInstance.setMgmtPort(serviceInstance.getManagementPort()); + llapInstance.setRpcPort(serviceInstance.getRpcPort()); + llapInstance.setShufflePort(serviceInstance.getShufflePort()); + llapInstance.setWebUrl(serviceInstance.getServicesAddress()); + llapInstance.setStatusUrl(serviceInstance.getServicesAddress() + "/status"); + validatedInstances.add(llapInstance); + } else { + // This likely indicates that an instance has recently restarted + // (the old instance has not been unregistered), and the new instances has not registered yet. + llapExtraInstances.add(containerIdString); + // This instance will not be added back, since it's services are not up yet. + } + + } + + appStatusBuilder.setLiveInstances(validatedInstances.size()); + appStatusBuilder.setLaunchingInstances(llapExtraInstances.size()); + if (appStatusBuilder.getDesiredInstances() != null && + validatedInstances.size() >= appStatusBuilder.getDesiredInstances()) { + appStatusBuilder.setState(State.RUNNING_ALL); + if (validatedInstances.size() > appStatusBuilder.getDesiredInstances()) { + LOG.warn("Found more entries in LLAP registry, as compared to desired entries"); + } + } else { + if (validatedInstances.size() > 0) { + appStatusBuilder.setState(State.RUNNING_PARTIAL); + } else { + appStatusBuilder.setState(State.LAUNCHING); + } + } + + // At this point, everything that can be consumed from AppStatusBuilder has been consumed. + // Debug only + if (appStatusBuilder.allRunningInstances().size() > 0) { + // Containers likely to come up soon. + LOG.debug("Potential instances starting up: {}", appStatusBuilder.allRunningInstances()); + } + if (llapExtraInstances.size() > 0) { + // Old containers which are likely shutting down, or new containers which + // launched between YARN Service status/diagnostics. Skip for this iteration. + LOG.debug("Instances likely to shutdown soon: {}", llapExtraInstances); + } + + appStatusBuilder.clearAndAddPreviouslyKnownRunningInstances(validatedInstances); + + } + return ExitCode.SUCCESS; + } + + private void close() { + if (serviceClient != null) { + serviceClient.stop(); + } + if (llapRegistry != null) { + llapRegistry.stop(); + } + } + + public static void main(String[] args) { + LlapStatusServiceCommandLine cl = LlapStatusServiceCommandLine.parseArguments(args); + LlapStatusServiceDriver statusServiceDriver = createServiceDriver(); + + ExitCode ret = ExitCode.SUCCESS; + Clock clock = SystemClock.getInstance(); + long lastSummaryLogTime = -1; + + boolean firstAttempt = true; + final long refreshInterval = cl.getRefreshIntervalMs(); + final boolean watchMode = cl.isWatchMode(); + final long watchTimeout = cl.getWatchTimeoutMs(); + long numAttempts = watchTimeout / refreshInterval; + numAttempts = watchMode ? numAttempts : 1; // Break out of the loop fast if watchMode is disabled. + State launchingState = null; + State currentState = null; + boolean desiredStateAttained = false; + final float runningNodesThreshold = cl.getRunningNodesThreshold(); + try (OutputStream os = cl.getOutputFile() == null ? System.out : new FileOutputStream(cl.getOutputFile()); + Writer w = new OutputStreamWriter(os, Charset.defaultCharset()); + PrintWriter pw = new PrintWriter(w)) { + + LOG.info("Configured refresh interval: {}s. Watch timeout: {}s. Attempts remaining: {}." + + " Watch mode: {}. Running nodes threshold: {}.", refreshInterval/1000, watchTimeout/1000, + numAttempts, watchMode, new DecimalFormat("#.###").format(runningNodesThreshold)); + while (numAttempts > 0) { + if (!firstAttempt) { + if (watchMode) { + try { + Thread.sleep(refreshInterval); + } catch (InterruptedException e) { + // ignore + } + } else { + // reported once, so break + break; + } + } else { + firstAttempt = false; + } + ret = statusServiceDriver.run(cl, watchMode ? watchTimeout : 0); + currentState = statusServiceDriver.appStatusBuilder.getState(); + try { + lastSummaryLogTime = LlapStatusServiceDriver.maybeLogSummary(clock, lastSummaryLogTime, + statusServiceDriver, watchMode, watchTimeout, launchingState); + } catch (Exception e) { + LOG.warn("Failed to log summary", e); + } + + if (ret == ExitCode.SUCCESS) { + if (watchMode) { + + // YARN Service has started llap application, now if for some reason + // state changes to COMPLETE then fail fast + if (launchingState == null && LAUNCHING_STATES.contains(currentState)) { + launchingState = currentState; + } + + if (currentState.equals(State.COMPLETE)) { + if (launchingState != null || cl.isLaunched()) { + LOG.warn("COMPLETE state reached while waiting for RUNNING state. Failing."); + System.err.println("Final diagnostics: " + statusServiceDriver.appStatusBuilder.getDiagnostics()); + break; + } else { + LOG.info("Found a stopped application; assuming it was a previous attempt " + + "and waiting for the next one. Omit the -l flag to avoid this."); + } + } + + if (!(currentState.equals(State.RUNNING_PARTIAL) || currentState.equals(State.RUNNING_ALL))) { + LOG.debug( + "Current state: {}. Desired state: {}. {}/{} instances.", + currentState, + runningNodesThreshold == 1.0f ? + State.RUNNING_ALL : + State.RUNNING_PARTIAL, + statusServiceDriver.appStatusBuilder.getLiveInstances(), + statusServiceDriver.appStatusBuilder.getDesiredInstances()); + numAttempts--; + continue; + } + + // we have reached RUNNING state, now check if running nodes threshold is met + final int liveInstances = statusServiceDriver.appStatusBuilder.getLiveInstances(); + final int desiredInstances = statusServiceDriver.appStatusBuilder.getDesiredInstances(); + if (desiredInstances > 0) { + final float ratio = (float) liveInstances / (float) desiredInstances; + if (ratio < runningNodesThreshold) { + LOG.debug( + "Waiting until running nodes threshold is reached. Current: {} Desired: {}." + + " {}/{} instances.", + new DecimalFormat("#.###").format(ratio), + new DecimalFormat("#.###").format(runningNodesThreshold), + statusServiceDriver.appStatusBuilder.getLiveInstances(), + statusServiceDriver.appStatusBuilder.getDesiredInstances()); + numAttempts--; + continue; + } else { + desiredStateAttained = true; + statusServiceDriver.appStatusBuilder.setRunningThresholdAchieved(true); + } + } else { + numAttempts--; + continue; + } + } + } else if (ret == ExitCode.YARN_ERROR && watchMode) { + LOG.warn("Watch mode enabled and got YARN error. Retrying.."); + numAttempts--; + continue; + } else if (ret == ExitCode.SERVICE_CLIENT_ERROR_CREATE_FAILED && watchMode) { + LOG.warn("Watch mode enabled and YARN Service client creation failed. Retrying.."); + numAttempts--; + continue; + } else if (ret == ExitCode.SERVICE_CLIENT_ERROR_OTHER && watchMode) { + LOG.warn("Watch mode enabled and got YARN Service client error. Retrying.."); + numAttempts--; + continue; + } else if (ret == ExitCode.LLAP_REGISTRY_ERROR && watchMode) { + LOG.warn("Watch mode enabled and got LLAP registry error. Retrying.."); + numAttempts--; + continue; + } + break; + } + // Log final state to CONSOLE_LOGGER + maybeLogSummary(clock, 0L, statusServiceDriver, watchMode, watchTimeout, launchingState); + CONSOLE_LOGGER.info("\n\n\n"); + + statusServiceDriver.outputJson(pw); // print current state before exiting + pw.flush(); + if (numAttempts == 0 && watchMode && !desiredStateAttained) { + LOG.warn("Watch timeout {}s exhausted before desired state RUNNING is attained.", watchTimeout/1000); + } + } catch (Throwable t) { + logError(t); + if (t instanceof LlapStatusCliException) { + LlapStatusCliException ce = (LlapStatusCliException) t; + ret = ce.getExitCode(); + } else { + ret = ExitCode.INTERNAL_ERROR; + } + } finally { + LOG.info("LLAP status finished"); + if (ret != ExitCode.SUCCESS) { + LOG.error("LLAP did not start. Check the application log for more info:\n" + + "\tyarn logs --applicationId {} -out ", statusServiceDriver.applicationId); + } + statusServiceDriver.close(); + } + LOG.debug("Completed processing - exiting with " + ret); + + // HACK: due to the System.exit some log messages may not be present. + try { + Thread.sleep(1000); + } catch (Exception e) { + // ignore + } + System.exit(ret.getCode()); + } + + private static long maybeLogSummary(Clock clock, long lastSummaryLogTime, LlapStatusServiceDriver statusServiceDriver, + boolean watchMode, long watchTimeout, State launchingState) { + long currentTime = clock.getTime(); + if (lastSummaryLogTime < currentTime - LOG_SUMMARY_INTERVAL) { + String diagString = null; + if (launchingState == null && statusServiceDriver.appStatusBuilder.getState() == State.COMPLETE && watchMode) { + // First known state was COMPLETED. Wait for the app launch to start. + diagString = "Awaiting LLAP launch"; + // Clear completed instances in this case. Don't want to provide information from the previous run. + statusServiceDriver.appStatusBuilder.clearCompletedLlapInstances(); + } else { + diagString = constructDiagnostics(statusServiceDriver.appStatusBuilder); + } + + if (lastSummaryLogTime == -1) { + if (watchMode) { + CONSOLE_LOGGER.info("\nLLAPSTATUS WatchMode with timeout={} s", watchTimeout/1000); + } else { + CONSOLE_LOGGER.info("\nLLAPSTATUS"); + } + CONSOLE_LOGGER.info("--------------------------------------------------------------------------------"); + } + CONSOLE_LOGGER.info(diagString); + CONSOLE_LOGGER.info("--------------------------------------------------------------------------------"); + lastSummaryLogTime = currentTime; + } + return lastSummaryLogTime; + } + + /** + * Helper method to construct a diagnostic message from a complete AppStatusBuilder. + */ + private static String constructDiagnostics(AppStatusBuilder appStatusBuilder) { + StringBuilder sb = new StringBuilder(); + + switch (appStatusBuilder.getState()) { + case APP_NOT_FOUND: + sb.append("LLAP status unknown. Awaiting app launch"); + break; + case LAUNCHING: + // This is a catch all state - when containers have not started yet, or LLAP has not started yet. + if (StringUtils.isNotBlank(appStatusBuilder.getAmInfo().getAppId())) { + sb.append("LLAP Starting up with AppId=").append(appStatusBuilder.getAmInfo().getAppId()).append("."); + if (appStatusBuilder.getDesiredInstances() != null) { + sb.append(" Started 0/").append(appStatusBuilder.getDesiredInstances()).append(" instances"); + } + + String containerDiagnostics = constructCompletedContainerDiagnostics( + appStatusBuilder.getCompletedInstances()); + if (StringUtils.isNotEmpty(containerDiagnostics)) { + sb.append("\n").append(containerDiagnostics); + } + } else { + sb.append("Awaiting LLAP startup"); + } + break; + case RUNNING_PARTIAL: + sb.append("LLAP Starting up with ApplicationId=").append(appStatusBuilder.getAmInfo().getAppId()); + sb.append(" Started").append(appStatusBuilder.getLiveInstances()).append("/") + .append(appStatusBuilder.getDesiredInstances()).append(" instances"); + String containerDiagnostics = constructCompletedContainerDiagnostics(appStatusBuilder.getCompletedInstances()); + if (StringUtils.isNotEmpty(containerDiagnostics)) { + sb.append("\n").append(containerDiagnostics); + } + + // TODO HIVE-15865: Include information about pending requests, and last + // allocation time once YARN Service provides this information. + break; + case RUNNING_ALL: + sb.append("LLAP Application running with ApplicationId=").append(appStatusBuilder.getAmInfo().getAppId()); + break; + case COMPLETE: + sb.append("LLAP Application already complete. ApplicationId=").append(appStatusBuilder.getAmInfo().getAppId()); + containerDiagnostics = constructCompletedContainerDiagnostics(appStatusBuilder.getCompletedInstances()); + if (StringUtils.isNotEmpty(containerDiagnostics)) { + sb.append("\n").append(containerDiagnostics); + } + + break; + case UNKNOWN: + sb.append("LLAP status unknown"); + break; + default: + throw new IllegalStateException("Unknown State: " + appStatusBuilder.getState()); + } + if (StringUtils.isNotBlank(appStatusBuilder.getDiagnostics())) { + sb.append("\n").append(appStatusBuilder.getDiagnostics()); + } + + return sb.toString(); + } + + private static String constructCompletedContainerDiagnostics(List completedInstances) { + StringBuilder sb = new StringBuilder(); + if (completedInstances == null || completedInstances.size() == 0) { + return ""; + } else { + // TODO HIVE-15865 Ideally sort these by completion time, once that is available. + boolean isFirst = true; + for (LlapInstance instance : completedInstances) { + if (!isFirst) { + sb.append("\n"); + } else { + isFirst = false; + } + + if (instance.getYarnContainerExitStatus() == ContainerExitStatus.KILLED_EXCEEDED_PMEM || + instance.getYarnContainerExitStatus() == ContainerExitStatus.KILLED_EXCEEDED_VMEM) { + sb.append("\tKILLED container (by YARN for exceeding memory limits): "); + } else { + // TODO HIVE-15865 Handle additional reasons like OS launch failed + sb.append("\tFAILED container: "); + } + sb.append(" ").append(instance.getContainerId()); + sb.append(", Logs at: ").append(instance.getLogUrl()); + } + } + return sb.toString(); + } + + private static void logError(Throwable t) { + LOG.error("FAILED: " + t.getMessage(), t); + System.err.println("FAILED: " + t.getMessage()); + } +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/State.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/State.java new file mode 100644 index 0000000..40c189a --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/State.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +/** + * Enumeration of the potential states of the Llap. + */ +enum State { + APP_NOT_FOUND, + LAUNCHING, + RUNNING_PARTIAL, + RUNNING_ALL, + COMPLETE, + UNKNOWN +} diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/package-info.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/package-info.java new file mode 100644 index 0000000..79cadc7 --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/package-info.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Package consisting the program LlapStatusServiceDriver (and other classes used by it) + * which is monitoring if Llap is running. + */ +package org.apache.hadoop.hive.llap.cli.status; + diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cli/TestLlapStatusServiceDriver.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cli/TestLlapStatusServiceDriver.java deleted file mode 100644 index 54166d5..0000000 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/cli/TestLlapStatusServiceDriver.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.llap.cli; - -import org.apache.hadoop.hive.llap.cli.LlapStatusOptionsProcessor.LlapStatusOptions; -import org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver.ExitCode; -import org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver.LlapStatusCliException; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -import static junit.framework.TestCase.assertEquals; - -import java.util.Properties; - -// TODO: write unit tests for the main logic of this class - needs refactoring first, current design isn't testable. -public class TestLlapStatusServiceDriver { - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void testArgumentParsingDefault() throws LlapStatusCliException { - LlapStatusServiceDriver driver = new LlapStatusServiceDriver(); - LlapStatusOptions parseOptions = driver.parseOptions(new String[] {}); - - assertEquals("findAppTimeout should be the default value if not specified otherwise", - parseOptions.getFindAppTimeoutMs(), LlapStatusOptionsProcessor.FIND_YARN_APP_TIMEOUT_MS); - - assertEquals("refreshInterval should be the default value if not specified otherwise", - parseOptions.getRefreshIntervalMs(), LlapStatusOptionsProcessor.DEFAULT_STATUS_REFRESH_INTERVAL_MS); - - assertEquals("watchTimeout should be the default value if not specified otherwise", - parseOptions.getWatchTimeoutMs(), LlapStatusOptionsProcessor.DEFAULT_WATCH_MODE_TIMEOUT_MS); - - assertEquals("runningNodesThreshold should be the default value if not specified otherwise", - parseOptions.getRunningNodesThreshold(), LlapStatusOptionsProcessor.DEFAULT_RUNNING_NODES_THRESHOLD); - - assertEquals("hiveConf should be empty properties if not specified otherwise", parseOptions.getConf(), - new Properties()); - - assertEquals("isLaunched should be the true if not specified otherwise", parseOptions.isLaunched(), true); - - assertEquals("watchMode should be the false if not specified otherwise", parseOptions.isWatchMode(), false); - } - - @Test - public void testNegativeRefreshInterval() throws LlapStatusCliException { - thrown.expect(LlapStatusCliException.class); - thrown.expectMessage(ExitCode.INCORRECT_USAGE.getInt() + ": Incorrect usage"); - - LlapStatusServiceDriver driver = new LlapStatusServiceDriver(); - driver.parseOptions(new String[] {"--refreshInterval -1"}); - } - - @Test - public void testNegativeWatchTimeout() throws LlapStatusCliException { - thrown.expect(LlapStatusCliException.class); - thrown.expectMessage(ExitCode.INCORRECT_USAGE.getInt() + ": Incorrect usage"); - - LlapStatusServiceDriver driver = new LlapStatusServiceDriver(); - driver.parseOptions(new String[] {"--watchTimeout -1"}); - } - - @Test - public void testNegativeRunningNodesThreshold() throws LlapStatusCliException { - thrown.expect(LlapStatusCliException.class); - thrown.expectMessage(ExitCode.INCORRECT_USAGE.getInt() + ": Incorrect usage"); - - LlapStatusServiceDriver driver = new LlapStatusServiceDriver(); - driver.parseOptions(new String[] {"--runningNodesThreshold -1"}); - } - - @Test - public void testRunningNodesThresholdOverOne() throws LlapStatusCliException { - thrown.expect(LlapStatusCliException.class); - thrown.expectMessage(ExitCode.INCORRECT_USAGE.getInt() + ": Incorrect usage"); - - LlapStatusServiceDriver driver = new LlapStatusServiceDriver(); - driver.parseOptions(new String[] {"--runningNodesThreshold 1.1"}); - } -} diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cli/status/TestLlapStatusServiceCommandLine.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cli/status/TestLlapStatusServiceCommandLine.java new file mode 100644 index 0000000..3e4f683 --- /dev/null +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cli/status/TestLlapStatusServiceCommandLine.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import static junit.framework.TestCase.assertEquals; + +import java.util.Properties; + +/** + * Tests for LlapStatusServiceCommandLine. + */ +public class TestLlapStatusServiceCommandLine { + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Test + public void testArgumentParsingDefault() throws Exception { + LlapStatusServiceCommandLine cl = new LlapStatusServiceCommandLine(new String[] {}); + + assertEquals("findAppTimeout should be the default value if not specified otherwise", + cl.getFindAppTimeoutMs(), LlapStatusServiceCommandLine.DEFAULT_FIND_YARN_APP_TIMEOUT_MS); + + assertEquals("refreshInterval should be the default value if not specified otherwise", + cl.getRefreshIntervalMs(), LlapStatusServiceCommandLine.DEFAULT_STATUS_REFRESH_INTERVAL_MS); + + assertEquals("watchTimeout should be the default value if not specified otherwise", + cl.getWatchTimeoutMs(), LlapStatusServiceCommandLine.DEFAULT_WATCH_MODE_TIMEOUT_MS); + + assertEquals("runningNodesThreshold should be the default value if not specified otherwise", + cl.getRunningNodesThreshold(), LlapStatusServiceCommandLine.DEFAULT_RUNNING_NODES_THRESHOLD); + + assertEquals("hiveConf should be empty properties if not specified otherwise", cl.getHiveConf(), + new Properties()); + + assertEquals("isLaunched should be the true if not specified otherwise", cl.isLaunched(), true); + + assertEquals("watchMode should be the false if not specified otherwise", cl.isWatchMode(), false); + } + + @Test + public void testNegativeRefreshInterval() throws Exception { + thrown.expect(IllegalArgumentException.class); + thrown.expectMessage("Refresh interval should be >0"); + + new LlapStatusServiceCommandLine(new String[] {"--refreshInterval", "-1"}); + } + + @Test + public void testNegativeWatchTimeout() throws Exception { + thrown.expect(IllegalArgumentException.class); + thrown.expectMessage("Watch timeout should be >0"); + + new LlapStatusServiceCommandLine(new String[] {"--watchTimeout", "-1"}); + } + + @Test + public void testNegativeRunningNodesThreshold() throws Exception { + thrown.expect(IllegalArgumentException.class); + thrown.expectMessage("Running nodes threshold value should be between 0.0 and 1.0 (inclusive)"); + + new LlapStatusServiceCommandLine(new String[] {"--runningNodesThreshold", "-1"}); + } + + @Test + public void testRunningNodesThresholdOverOne() throws Exception { + thrown.expect(IllegalArgumentException.class); + thrown.expectMessage("Running nodes threshold value should be between 0.0 and 1.0 (inclusive)"); + + new LlapStatusServiceCommandLine(new String[] {"--runningNodesThreshold", "1.1"}); + } +} diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cli/status/package-info.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cli/status/package-info.java new file mode 100644 index 0000000..9af5dd8 --- /dev/null +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cli/status/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Package consisting the tests for the program LlapStatusServiceDriver and other classes used by it. + */ +package org.apache.hadoop.hive.llap.cli.status; +