diff --git llap-server/src/java/org/apache/hadoop/hive/llap/configuration/LlapDaemonConfiguration.java llap-server/src/java/org/apache/hadoop/hive/llap/configuration/LlapDaemonConfiguration.java index 4fc7bd6b1a..39b910532c 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/configuration/LlapDaemonConfiguration.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/configuration/LlapDaemonConfiguration.java @@ -17,6 +17,7 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.conf.HiveConf; /** * Configuration for LLAP daemon processes only. This should not be used by any clients. @@ -36,6 +37,7 @@ public LlapDaemonConfiguration() { super(true); // Load the defaults. + addResource(new HiveConf()); for (String conf : DAEMON_CONFIGS) { addResource(conf); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index a151eefc95..15060ba8e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -1372,14 +1372,15 @@ public JobConf createConfiguration(HiveConf hiveConf, boolean skipAMConf) throws hiveConf.setBoolean("mapred.mapper.new-api", false); Predicate findDefaults = - (s) -> ((s != null) && ((s.endsWith(".xml") && !s.endsWith("hive-site.xml")) || - (s.endsWith(".java") && !"HiveConf.java".equals(s)))); + (s) -> ((s != null) && (s.endsWith(".xml") || s.endsWith(".java"))); // since this is an inclusion filter, negate the predicate JobConf conf = TezConfigurationFactory .wrapWithJobConf(hiveConf, skipAMConf ? findDefaults.negate() : null); + conf.set("mapred.mapper.new-api", "false"); + conf.set("mapred.output.committer.class", NullOutputCommitter.class.getName()); conf.setBoolean("mapred.committer.job.setup.cleanup.needed", false); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java index 2e0a408d04..37ec859b27 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java @@ -126,8 +126,9 @@ public HiveSplitGenerator(InputInitializerContext initializerContext) throws IOE Preconditions.checkNotNull(initializerContext); userPayloadProto = MRInputHelpers.parseMRInputPayload(initializerContext.getInputUserPayload()); - - this.conf = new Configuration(initializerContext.getVertexConfiguration()); + + this.conf = new HiveConf(); + this.conf.addResource(initializerContext.getVertexConfiguration()); TezUtils.addToConfFromByteString(this.conf, userPayloadProto.getConfigurationBytes()); this.jobConf = new JobConf(conf); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java index a0da0ad2fe..625256c514 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezConfigurationFactory.java @@ -24,8 +24,12 @@ import java.util.function.Predicate; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.MRConfig; +import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,7 +67,7 @@ public static Configuration copyInto(Configuration target, Configuration src, } if (sourceFilter == null || sourceFilter.test(source)) { - target.set(name, value); + target.set(name, value, source); } else { } } @@ -74,6 +78,69 @@ public static JobConf wrapWithJobConf(Configuration conf, Predicate sour JobConf jc = new JobConf(false); copyInto(jc, defaultConf, sourceFilter); copyInto(jc, conf, sourceFilter); + copyDagBuildStageConfs(jc, conf); return jc; } + + private static void copyDagBuildStageConfs(Configuration target, Configuration source) { + for (String prop: dagBuildStageConfs) { + String value = source.get(prop); + if (value != null) { + target.set(prop, value); + } + } + } + + private static String[] dagBuildStageConfs = { + MRJobConfig.IO_SORT_MB, + MRJobConfig.IO_SORT_FACTOR, + MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, + MRJobConfig.SHUFFLE_READ_TIMEOUT, + MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, + MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, + MRJobConfig.GROUP_COMPARATOR_CLASS, + MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, + MRJobConfig.MAP_SORT_SPILL_PERCENT, + MRJobConfig.MAP_OUTPUT_KEY_CLASS, + MRJobConfig.MAP_OUTPUT_VALUE_CLASS, + MRJobConfig.MAP_MEMORY_MB, + MRJobConfig.SHUFFLE_CONNECT_TIMEOUT, + MRJobConfig.INDEX_CACHE_MEMORY_LIMIT, + MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, + MRJobConfig.KEY_COMPARATOR, + MRJobConfig.JOB_NAME, + MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, + MRJobConfig.MAP_OUTPUT_COMPRESS, + MRJobConfig.RECORDS_BEFORE_PROGRESS, + MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, + MRJobConfig.REDUCE_MEMTOMEM_ENABLED, + MRJobConfig.SHUFFLE_NOTIFY_READERROR, + MRConfig.MAPRED_IFILE_READAHEAD, + MRConfig.MAPRED_IFILE_READAHEAD_BYTES, + MRConfig.SHUFFLE_SSL_ENABLED_KEY, + MRJobConfig.SHUFFLE_FETCH_FAILURES, + MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, + MRJobConfig.SHUFFLE_PARALLEL_COPIES, + MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, + MRJobConfig.SHUFFLE_MERGE_PERCENT, + MRJobConfig.MAP_CPU_VCORES, + MRJobConfig.MAP_COMBINE_MIN_SPILLS, + TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, + TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, + TezRuntimeConfiguration.TEZ_RUNTIME_COMBINER_CLASS, + TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, + TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, + TezConfiguration.TEZ_AM_VIEW_ACLS, + TezConfiguration.TEZ_AM_MODIFY_ACLS, + HiveConf.ConfVars.HIVETEZCPUVCORES.varname, + HiveConf.ConfVars.HIVE_RPC_QUERY_PLAN.varname, + HiveConf.ConfVars.HIVETEZCONTAINERSIZE.varname, + HiveConf.ConfVars.HIVE_AM_SPLIT_GENERATION.varname, + HiveConf.ConfVars.HIVETEZHS2USERACCESS.varname, + JobConf.MAPRED_TASK_ENV, + "fs.file.impl.disable.cache", + "mapred.input.format.class", + "map.sort.class", + "mapred.combiner.class" + }; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java index 590de42735..d1045d1de9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezProcessor.java @@ -25,7 +25,9 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; +import com.google.protobuf.ByteString; import org.apache.hadoop.hive.conf.Constants; +import org.apache.tez.dag.api.UserPayload; import org.apache.tez.runtime.api.TaskFailureType; import org.apache.tez.runtime.api.events.CustomProcessorEvent; import org.slf4j.Logger; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java index abc277ca99..e40e1d5f59 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/Utils.java @@ -48,7 +48,9 @@ public static SplitLocationProvider getSplitLocationProvider(Configuration conf, && HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS) && useCacheAffinity; SplitLocationProvider splitLocationProvider; - LOG.info("SplitGenerator using llap affinitized locations: " + useCustomLocations); + LOG.info("SplitGenerator using llap affinitized locations: {}, executionMode: {}, consistentSplits: {}, useCacheAffinity: {}", + useCustomLocations, HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap"), + HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_CLIENT_CONSISTENT_SPLITS), useCacheAffinity); if (useCustomLocations) { LlapRegistryService serviceRegistry = LlapRegistryService.getClient(conf); return getCustomSplitLocationProvider(serviceRegistry, LOG);