diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 2cea174..7c07c4f 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -5352,7 +5352,7 @@ public String getLogIdVar(String defaultValue) { public static void setVar(Configuration conf, ConfVars var, String val) { assert (var.valClass == String.class) : var.varname; - conf.set(var.varname, val); + conf.set(var.varname, val, "setVar"); } public static void setVar(Configuration conf, ConfVars var, String val, EncoderDecoder encoderDecoder) { @@ -5461,7 +5461,7 @@ private void initialize(Class cls) { origProp = getAllProperties(); // Overlay the ConfVars. Note that this ignores ConfVars with null values - addResource(getConfVarInputStream()); + addResource(getConfVarInputStream(), "HiveConf.java"); // Overlay hive-site.xml if it exists if (hiveSiteURL != null) { @@ -5574,8 +5574,8 @@ private void setupSQLStdAuthWhiteList() { if (whiteListParamsStr == null || whiteListParamsStr.trim().isEmpty()) { // set the default configs in whitelist whiteListParamsStr = getSQLStdAuthDefaultWhiteListPattern(); + setVar(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST, whiteListParamsStr); } - setVar(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST, whiteListParamsStr); } private static String getSQLStdAuthDefaultWhiteListPattern() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index f06ac37..f3bcfdf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -44,6 +44,7 @@ import java.util.Set; import java.util.Stack; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.ZipOutputStream; @@ -1344,9 +1345,27 @@ public boolean checkOrWaitForTheFile(FileSystem srcFs, Path src, Path dest, Conf * @throws IOException */ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { + return createConfiguration(hiveConf, false); + } + + /** + * Creates and initializes a JobConf object that can be used to execute + * the DAG. This can skip the configs which are already included in AM configs. + * @param hiveConf Current conf for the execution + * @param skipAMConf Skip the configs where are already set across all DAGs + * @return JobConf base configuration for job execution + * @throws IOException + */ + public JobConf createConfiguration(HiveConf hiveConf, boolean skipAMConf) throws IOException { hiveConf.setBoolean("mapred.mapper.new-api", false); - JobConf conf = new JobConf(new TezConfiguration(hiveConf)); + Predicate findDefaults = + (s) -> ((s != null) && s.endsWith(".xml") && s.endsWith(".java")); + + // since this is an inclusion filter, negate the predicate + JobConf conf = + TezConfigurationFactory + .wrapWithJobConf(hiveConf, skipAMConf ? findDefaults.negate() : null); conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); @@ -1364,6 +1383,7 @@ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { // Removing job credential entry/ cannot be set on the tasks conf.unset("mapreduce.job.credentials.binary"); + // TODO: convert this to a predicate too hiveConf.stripHiddenConfigurations(conf); return conf; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java new file mode 100644 index 0000000..c040faa --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.tez; + +import java.util.Iterator; +import java.util.Map; +import java.util.function.Predicate; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.JobConf; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.dag.api.records.DAGProtos; + +public class TezConfigurationFactory { + + private static enum ConfigurationHolder { + INSTANCE; + + TezConfiguration defaultConf = new TezConfiguration(); + + public TezConfiguration getDefaultConfiguration() { + return defaultConf; + } + } + + public static Configuration copyInto(Configuration target, Configuration src, + Predicate sourceFilter) { + long skipped = 0; + long total = 0; + Iterator> iter = src.iterator(); + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + String name = entry.getKey(); + String value = entry.getValue(); + String[] sources = src.getPropertySources(name); + final String source; + if (sources == null || sources.length == 0) { + source = null; + } else { + /* + * If the property or its source wasn't found. Otherwise, returns a list of the sources of + * the resource. The older sources are the first ones in the list. + */ + source = sources[sources.length - 1]; + } + if (sourceFilter == null || sourceFilter.test(source)) { + target.set(name, value); + } else { + skipped++; + } + total++; + } + return target; + } + + public static TezConfiguration wrapWithTezConf(Configuration conf, Predicate sourceFilter) { + TezConfiguration tezconf = new TezConfiguration(false); + // this is a useful approximation, since the overlays in tez-site.xml are unique to that file + // (or hive set parameters) + // if Configuration.getOverlay() was public, this could be applied in a neater fashion + copyInto(tezconf, ConfigurationHolder.INSTANCE.getDefaultConfiguration(), sourceFilter); + copyInto(tezconf, conf, sourceFilter); + return tezconf; + } + + public static TezConfiguration wrapWithTezConf(Configuration conf) { + return wrapWithTezConf(conf, null); + } + + public static JobConf wrapWithJobConf(Configuration conf, Predicate sourceFilter) { + JobConf jc = new JobConf(false); + copyInto(jc, conf, sourceFilter); + return jc; + } + + public static JobConf wrapWithJobConf(Configuration conf) { + return wrapWithJobConf(conf, null); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index dd7ccd4..19c5ce0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -174,8 +174,8 @@ public int execute(DriverContext driverContext) { ss.getHiveVariables().get("wmpool"), ss.getHiveVariables().get("wmapp")); WmContext wmContext = ctx.getWmContext(); - // jobConf will hold all the configuration for hadoop, tez, and hive - JobConf jobConf = utils.createConfiguration(conf); + // jobConf will hold all the configuration for hadoop, tez, and hive, which are not set in AM defaults + JobConf jobConf = utils.createConfiguration(conf, true); // Get all user jars from work (e.g. input format stuff). String[] allNonConfFiles = work.configureJobConfAndExtractJars(jobConf); // DAG scratch dir. We get a session from the pool so it may be different from Tez one.