diff --git common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index c6a1cbd..44daeb4 100644 --- common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -47,7 +47,7 @@ public String getAggregator(Configuration conf) { public String getPublisher(Configuration conf) { return "org.apache.hadoop.hive.ql.stats.CounterStatsPublisher"; } public String getAggregator(Configuration conf) { - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_TEZ)) { + if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregatorTez"; } return "org.apache.hadoop.hive.ql.stats.CounterStatsAggregator"; } diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index d68572e..65a51f0 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -864,7 +864,8 @@ // Whether to show the unquoted partition names in query results. HIVE_DECODE_PARTITION_NAME("hive.decode.partition.name", false), - HIVE_OPTIMIZE_TEZ("hive.optimize.tez", false), + HIVE_EXECUTION_ENGINE("hive.execution.engine", "mr", + new StringsValidator("mr", "tez")), HIVE_JAR_DIRECTORY("hive.jar.directory", "hdfs:///user/hive/"), HIVE_USER_INSTALL_DIR("hive.user.install.directory", "hdfs:///user/"), diff --git conf/hive-default.xml.template conf/hive-default.xml.template index 9ac2d94..def4b7b 100644 --- conf/hive-default.xml.template +++ conf/hive-default.xml.template @@ -2037,11 +2037,10 @@ - hive.optimize.tez - false + hive.execution.engine + mr - Setting this property turns on Tez execution. Needs tez installed on the - cluster. (Only availble on hadoop 2) + Chooses execution engine. Options are: mr (Map reduce, default) or tez (hadoop 2 only) diff --git data/conf/tez/hive-site.xml data/conf/tez/hive-site.xml index 73c7f70..1af4495 100644 --- data/conf/tez/hive-site.xml +++ data/conf/tez/hive-site.xml @@ -189,9 +189,9 @@ - hive.optimize.tez - true - Whether tez is on or off + hive.execution.engine + tez + Whether to use MR or Tez diff --git ql/src/java/org/apache/hadoop/hive/ql/HashTableLoaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/HashTableLoaderFactory.java index d962c62..10ad933 100644 --- ql/src/java/org/apache/hadoop/hive/ql/HashTableLoaderFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/HashTableLoaderFactory.java @@ -32,7 +32,7 @@ private HashTableLoaderFactory() { } public static HashTableLoader getLoader(Configuration hconf) { - if (HiveConf.getBoolVar(hconf, ConfVars.HIVE_OPTIMIZE_TEZ)) { + if (HiveConf.getVar(hconf, ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { return new org.apache.hadoop.hive.ql.exec.tez.HashTableLoader(); } else { return new org.apache.hadoop.hive.ql.exec.mr.HashTableLoader(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java index b19e7a0..ce705c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java @@ -49,7 +49,7 @@ public static MapredContext get() { public static MapredContext init(boolean isMap, JobConf jobConf) { MapredContext context = - HiveConf.getBoolVar(jobConf, ConfVars.HIVE_OPTIMIZE_TEZ) ? + HiveConf.getVar(jobConf, ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ? new TezContext(isMap, jobConf) : new MapredContext(isMap, jobConf); contexts.set(context); return context; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java index 953b231..271d943 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ObjectCacheFactory.java @@ -35,7 +35,7 @@ private ObjectCacheFactory() { * Returns the appropriate cache */ public static ObjectCache getCache(Configuration conf) { - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_TEZ)) { + if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { return new org.apache.hadoop.hive.ql.exec.tez.ObjectCache(); } else { return new org.apache.hadoop.hive.ql.exec.mr.ObjectCache(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 62fc099..480882a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -2937,7 +2937,8 @@ public static double getHighestSamplePercentage (MapWork work) { pathsProcessed.add(path); LOG.info("Adding input file " + path); - if (!HiveConf.getBoolVar(job, ConfVars.HIVE_OPTIMIZE_TEZ) && isEmptyPath(job, path, ctx)) { + if (!HiveConf.getVar(job, ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") + && isEmptyPath(job, path, ctx)) { path = createDummyFileForEmptyPartition(path, job, work, hiveScratchDir, alias, sequenceNumber++); @@ -2954,7 +2955,8 @@ public static double getHighestSamplePercentage (MapWork work) { // T2) x; // If T is empty and T2 contains 100 rows, the user expects: 0, 100 (2 // rows) - if (path == null && !HiveConf.getBoolVar(job, ConfVars.HIVE_OPTIMIZE_TEZ)) { + if (path == null + && !HiveConf.getVar(job, ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { path = createDummyFileForEmptyTable(job, work, hiveScratchDir, alias, sequenceNumber++); pathsToAdd.add(path); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java index d702875..a4585de 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/HadoopJobExecHelper.java @@ -138,7 +138,7 @@ public HadoopJobExecHelper(JobConf job, LogHelper console, if (job != null) { // even with tez on some jobs are run as MR. disable the flag in // the conf, so that the backend runs fully as MR. - HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_OPTIMIZE_TEZ, false); + HiveConf.setVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "mr"); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index eedb477..2e96348 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1263,7 +1263,7 @@ public static void createMRWorkForMergingFiles (FileSinkOperator fsInput, } else { cplan = createMRWorkForMergingFiles(conf, tsMerge, fsInputDesc); - if (conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_TEZ)) { + if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { work = new TezWork(); cplan.setName("Merge"); ((TezWork)work).add(cplan); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index f75900a..6df4b3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -116,7 +116,7 @@ public void initialize(HiveConf hiveConf) { transformations.add(new StatsOptimizer()); } if (pctx.getContext().getExplain() || - HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_TEZ)) { + HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { transformations.add(new AnnotateWithStatistics()); } transformations.add(new SimpleFetchOptimizer()); // must be called last diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index c5b4ffe..4beb790 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6646,7 +6646,7 @@ private void pushJoinFilters(QB qb, QBJoinTree joinTree, if (((ASTNode) hint.getChild(0)).getToken().getType() == HiveParser.TOK_MAPJOIN) { // the user has specified to ignore mapjoin hint if (!conf.getBoolVar(HiveConf.ConfVars.HIVEIGNOREMAPJOINHINT) - && !conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_TEZ)) { + && !conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { ASTNode hintTblNames = (ASTNode) hint.getChild(1); int numCh = hintTblNames.getChildCount(); for (int tblPos = 0; tblPos < numCh; tblPos++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java index dd83d15..6a415c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java @@ -35,7 +35,7 @@ private TaskCompilerFactory() { * into executable units. */ public static TaskCompiler getCompiler(HiveConf conf, ParseContext parseContext) { - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_TEZ) + if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && !parseContext.getQB().getParseInfo().isAnalyzeCommand()) { return new TezCompiler(); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 0d4f094..92053cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -390,7 +390,7 @@ public static TableDesc getReduceKeyTableDesc(List fieldSchemas, */ public static TableDesc getMapJoinKeyTableDesc(Configuration conf, List fieldSchemas) { - if (HiveConf.getBoolVar(conf, ConfVars.HIVE_OPTIMIZE_TEZ)) { + if (HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { // In tez we use a different way of transmitting the hash table. // We basically use ReduceSinkOperators and set the transfer to // be broadcast (instead of partitioned). As a consequence we use diff --git ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 3e69f01..ef35f1a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -310,7 +310,8 @@ public static SessionState start(SessionState startSs) { throw new RuntimeException(e); } - if (HiveConf.getBoolVar(startSs.getConf(), HiveConf.ConfVars.HIVE_OPTIMIZE_TEZ)) { + if (HiveConf.getVar(startSs.getConf(), HiveConf.ConfVars.HIVE_EXECUTION_ENGINE) + .equals("tez")) { try { if (startSs.tezSessionState == null) { startSs.tezSessionState = new TezSessionState(); @@ -320,7 +321,7 @@ public static SessionState start(SessionState startSs) { throw new RuntimeException(e); } } else { - LOG.info("No Tez session required at this point. hive.optimize.tez is false."); + LOG.info("No Tez session required at this point. hive.execution.engine=mr."); } return startSs; diff --git ql/src/test/queries/clientpositive/mrr.q ql/src/test/queries/clientpositive/mrr.q index 7f78dc8..9f068cc 100644 --- ql/src/test/queries/clientpositive/mrr.q +++ ql/src/test/queries/clientpositive/mrr.q @@ -1,5 +1,3 @@ -set hive.optimize.tez=true; - -- simple query with multiple reduce stages EXPLAIN SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt; SELECT key, count(value) as cnt FROM src GROUP BY key ORDER BY cnt; diff --git ql/src/test/queries/clientpositive/tez_dml.q ql/src/test/queries/clientpositive/tez_dml.q index b4ca8be..761cd74 100644 --- ql/src/test/queries/clientpositive/tez_dml.q +++ ql/src/test/queries/clientpositive/tez_dml.q @@ -1,4 +1,3 @@ -set hive.optimize.tez=true; set hive.exec.dynamic.partition.mode=nonstrict; -- CTAS diff --git ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q index b465c02..d7a652f 100644 --- ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q +++ ql/src/test/queries/clientpositive/tez_insert_overwrite_local_directory_1.q @@ -1,5 +1,3 @@ -set hive.optimize.tez=true; - insert overwrite local directory '${system:test.tmp.dir}/tez_local_src_table_1' select * from src order by key limit 10 ; dfs -cat file:${system:test.tmp.dir}/tez_local_src_table_1/000000_0 ; diff --git ql/src/test/queries/clientpositive/tez_join_tests.q ql/src/test/queries/clientpositive/tez_join_tests.q index 51674b7..f309e3f 100644 --- ql/src/test/queries/clientpositive/tez_join_tests.q +++ ql/src/test/queries/clientpositive/tez_join_tests.q @@ -1,4 +1,3 @@ -set hive.optimize.tez=true; explain select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key; diff --git ql/src/test/queries/clientpositive/tez_joins_explain.q ql/src/test/queries/clientpositive/tez_joins_explain.q index d720811..9193843 100644 --- ql/src/test/queries/clientpositive/tez_joins_explain.q +++ ql/src/test/queries/clientpositive/tez_joins_explain.q @@ -1,4 +1,3 @@ -set hive.optimize.tez=true; explain select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key;