diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 440d761f03d..cce2fec01f3 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -5399,7 +5399,7 @@ public String getLogIdVar(String defaultValue) { public static void setVar(Configuration conf, ConfVars var, String val) { assert (var.valClass == String.class) : var.varname; - conf.set(var.varname, val); + conf.set(var.varname, val, "setVar"); } public static void setVar(Configuration conf, ConfVars var, String val, EncoderDecoder encoderDecoder) { @@ -5508,7 +5508,7 @@ private void initialize(Class cls) { origProp = getAllProperties(); // Overlay the ConfVars. Note that this ignores ConfVars with null values - addResource(getConfVarInputStream()); + addResource(getConfVarInputStream(), "HiveConf.java"); // Overlay hive-site.xml if it exists if (hiveSiteURL != null) { @@ -5621,8 +5621,8 @@ private void setupSQLStdAuthWhiteList() { if (whiteListParamsStr == null || whiteListParamsStr.trim().isEmpty()) { // set the default configs in whitelist whiteListParamsStr = getSQLStdAuthDefaultWhiteListPattern(); + setVar(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST, whiteListParamsStr); } - setVar(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST, whiteListParamsStr); } private static String getSQLStdAuthDefaultWhiteListPattern() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 3278dfea061..605596790cf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -44,6 +44,7 @@ import java.util.Set; import java.util.Stack; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.ZipOutputStream; @@ -1344,9 +1345,27 @@ public boolean checkOrWaitForTheFile(FileSystem srcFs, Path src, Path dest, Conf * @throws IOException */ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { + return createConfiguration(hiveConf, false); + } + + /** + * Creates and initializes a JobConf object that can be used to execute + * the DAG. This can skip the configs which are already included in AM configs. + * @param hiveConf Current conf for the execution + * @param skipAMConf Skip the configs where are already set across all DAGs + * @return JobConf base configuration for job execution + * @throws IOException + */ + public JobConf createConfiguration(HiveConf hiveConf, boolean skipAMConf) throws IOException { hiveConf.setBoolean("mapred.mapper.new-api", false); - JobConf conf = new JobConf(new TezConfiguration(hiveConf)); + Predicate findDefaults = + (s) -> ((s != null) && (s.endsWith(".xml") || (s.endsWith(".java") && !"HiveConf.java".equals(s)))); + + // since this is an inclusion filter, negate the predicate + JobConf conf = + TezConfigurationFactory + .wrapWithJobConf(hiveConf, skipAMConf ? findDefaults.negate() : null); conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); @@ -1364,6 +1383,7 @@ public JobConf createConfiguration(HiveConf hiveConf) throws IOException { // Removing job credential entry/ cannot be set on the tasks conf.unset("mapreduce.job.credentials.binary"); + // TODO: convert this to a predicate too hiveConf.stripHiddenConfigurations(conf); return conf; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java new file mode 100644 index 00000000000..f33debe1a27 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java @@ -0,0 +1,63 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.apache.hadoop.hive.ql.exec.tez; + +import java.util.Iterator; +import java.util.Map; +import java.util.function.Predicate; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.JobConf; + +public class TezConfigurationFactory { + + public static Configuration copyInto(Configuration target, Configuration src, + Predicate sourceFilter) { + Iterator> iter = src.iterator(); + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + String name = entry.getKey(); + String value = entry.getValue(); + String[] sources = src.getPropertySources(name); + final String source; + if (sources == null || sources.length == 0) { + source = null; + } else { + /* + * If the property or its source wasn't found. Otherwise, returns a list of the sources of + * the resource. The older sources are the first ones in the list. + */ + source = sources[sources.length - 1]; + } + + if (sourceFilter == null || sourceFilter.test(source)) { + target.set(name, value); + } else { + } + } + return target; + } + + public static JobConf wrapWithJobConf(Configuration conf, Predicate sourceFilter) { + JobConf jc = new JobConf(false); + copyInto(jc, conf, sourceFilter); + return jc; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java index dd7ccd4764d..b32b6b39c5b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java @@ -174,8 +174,10 @@ public int execute(DriverContext driverContext) { ss.getHiveVariables().get("wmpool"), ss.getHiveVariables().get("wmapp")); WmContext wmContext = ctx.getWmContext(); - // jobConf will hold all the configuration for hadoop, tez, and hive - JobConf jobConf = utils.createConfiguration(conf); + // jobConf will hold all the configuration for hadoop, tez, and hive, which are not set in AM defaults + JobConf jobConf = utils.createConfiguration(conf, true); + + // Get all user jars from work (e.g. input format stuff). String[] allNonConfFiles = work.configureJobConfAndExtractJars(jobConf); // DAG scratch dir. We get a session from the pool so it may be different from Tez one. @@ -217,7 +219,7 @@ public int execute(DriverContext driverContext) { if (this.isShutdown) { throw new HiveException("Operation cancelled"); } - DAGClient dagClient = submit(jobConf, dag, sessionRef); + DAGClient dagClient = submit(dag, sessionRef); session = sessionRef.value; boolean wasShutdown = false; synchronized (dagClientLock) { @@ -532,7 +534,7 @@ private TezSessionState getNewTezSessionOnError( return newSession; } - DAGClient submit(JobConf conf, DAG dag, Ref sessionStateRef) throws Exception { + DAGClient submit(DAG dag, Ref sessionStateRef) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_DAG); DAGClient dagClient = null; TezSessionState sessionState = sessionStateRef.value; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRegExp.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRegExp.java index 3bf3cfd3d9e..8522abd0ee6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRegExp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRegExp.java @@ -64,7 +64,7 @@ @Override public void configure(MapredContext context) { if (context != null) { - if(context.getJobConf().get("hive.use.googleregex.engine").equals("true")){ + if(HiveConf.getBoolVar(context.getJobConf(), HiveConf.ConfVars.HIVEUSEGOOGLEREGEXENGINE)){ this.useGoogleRegexEngine=true; } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java index befeb4f2dd4..4e4a9797ad6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java @@ -28,7 +28,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -220,7 +219,7 @@ public void testEmptyWork() throws IllegalArgumentException, IOException, Except @Test public void testSubmit() throws Exception { DAG dag = DAG.create("test"); - task.submit(conf, dag, Ref.from(sessionState)); + task.submit(dag, Ref.from(sessionState)); // validate close/reopen verify(sessionState, times(1)).reopen(); verify(session, times(2)).submitDAG(any(DAG.class)); diff --git a/ql/src/test/org/apache/hive/testutils/HiveTestEnvSetup.java b/ql/src/test/org/apache/hive/testutils/HiveTestEnvSetup.java index f872da02a3c..efab4083b5e 100644 --- a/ql/src/test/org/apache/hive/testutils/HiveTestEnvSetup.java +++ b/ql/src/test/org/apache/hive/testutils/HiveTestEnvSetup.java @@ -64,6 +64,11 @@ */ public class HiveTestEnvSetup extends ExternalResource { + private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + + File.separator + HiveTestEnvSetup.class.getCanonicalName() + + "-" + System.currentTimeMillis() + ).getPath().replaceAll("\\\\", "/"); + static interface IHiveTestRule { default void beforeClass(HiveTestEnvContext ctx) throws Exception { } @@ -222,12 +227,25 @@ public void beforeClass(HiveTestEnvContext ctx) throws Exception { HadoopShims shims = ShimLoader.getHadoopShims(); mr1 = shims.getLocalMiniTezCluster(ctx.hiveConf, true); mr1.setupConfiguration(ctx.hiveConf); + setupTez(ctx.hiveConf); } @Override public void afterClass(HiveTestEnvContext ctx) throws Exception { mr1.shutdown(); } + + private void setupTez(HiveConf conf) { + conf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez"); + conf.setVar(HiveConf.ConfVars.HIVE_USER_INSTALL_DIR, TEST_DATA_DIR); + conf.set("tez.am.resource.memory.mb", "128"); + conf.set("tez.am.dag.scheduler.class", "org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled"); + conf.setBoolean("tez.local.mode", true); + conf.set("fs.defaultFS", "file:///"); + conf.setBoolean("tez.runtime.optimize.local.fetch", true); + conf.set("tez.staging-dir", TEST_DATA_DIR); + conf.setBoolean("tez.ignore.lib.uris", true); + } } public static final String HIVE_ROOT = getHiveRoot(); diff --git a/ql/src/test/queries/clientpositive/mm_loaddata.q b/ql/src/test/queries/clientpositive/mm_loaddata.q index 7e5787f2a65..3a652134098 100644 --- a/ql/src/test/queries/clientpositive/mm_loaddata.q +++ b/ql/src/test/queries/clientpositive/mm_loaddata.q @@ -6,6 +6,7 @@ set tez.grouping.max-size=2; set hive.exec.dynamic.partition.mode=nonstrict; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set mapreduce.map.memory.mb=128; drop table load0_mm;