commit 98dae3767f3b218dd9548e681096dc1a19e5999a Author: Alice Fan Date: Sun Sep 23 22:39:30 2018 -0700 HIVE-12812 : Enable mapred.input.dir.recursive by default to support union with aggregate function diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java index 9f4a201421..c6aacc76af 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java @@ -34,6 +34,7 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; @@ -54,6 +55,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1; import org.apache.hadoop.hive.ql.optimizer.GenMROperator; import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext; @@ -70,6 +72,7 @@ import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.shims.ShimLoader; public class MapReduceCompiler extends TaskCompiler { @@ -79,6 +82,16 @@ public MapReduceCompiler() { } + @Override + public void init(QueryState queryState, LogHelper console, Hive db) { + super.init(queryState, console, db); + + //It is required the use of recursive input dirs when hive.optimize.union.remove = true + if(conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE)) { + conf.setBoolean("mapred.input.dir.recursive", true); + } + } + // loop over all the tasks recursively @Override protected void setInputFormat(Task task) { diff --git a/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q b/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q index 2db13f0687..a66166572e 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q +++ b/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q @@ -7,7 +7,6 @@ set hive.optimize.union.remove=true; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set hive.merge.sparkfiles=false; -set mapred.input.dir.recursive=true; -- This is to test the union->selectstar->filesink and skewjoin optimization -- Union of 2 map-reduce subqueries is performed for the skew join diff --git a/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q b/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q index 2a41e3af69..5b09bc2988 100644 --- a/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q +++ b/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q @@ -7,7 +7,6 @@ set hive.optimize.union.remove=true; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set hive.merge.sparkfiles=false; -set mapred.input.dir.recursive=true; CREATE TABLE T1_n8(key STRING, val STRING) SKEWED BY (key) ON ((2), (8)) STORED AS TEXTFILE; diff --git a/ql/src/test/queries/clientpositive/union_remove_1.q b/ql/src/test/queries/clientpositive/union_remove_1.q index 7276804197..01543f53cb 100644 --- a/ql/src/test/queries/clientpositive/union_remove_1.q +++ b/ql/src/test/queries/clientpositive/union_remove_1.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_10.q b/ql/src/test/queries/clientpositive/union_remove_10.q index dfd5d0adde..3ae057fc2b 100644 --- a/ql/src/test/queries/clientpositive/union_remove_10.q +++ b/ql/src/test/queries/clientpositive/union_remove_10.q @@ -6,7 +6,6 @@ set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_11.q b/ql/src/test/queries/clientpositive/union_remove_11.q index 6017336bb3..a92d574414 100644 --- a/ql/src/test/queries/clientpositive/union_remove_11.q +++ b/ql/src/test/queries/clientpositive/union_remove_11.q @@ -6,7 +6,6 @@ set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_12.q b/ql/src/test/queries/clientpositive/union_remove_12.q index 392c2978ab..1352bc0c17 100644 --- a/ql/src/test/queries/clientpositive/union_remove_12.q +++ b/ql/src/test/queries/clientpositive/union_remove_12.q @@ -7,7 +7,6 @@ set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_13.q b/ql/src/test/queries/clientpositive/union_remove_13.q index 6f337ad918..5bac272fa0 100644 --- a/ql/src/test/queries/clientpositive/union_remove_13.q +++ b/ql/src/test/queries/clientpositive/union_remove_13.q @@ -7,7 +7,6 @@ set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_14.q b/ql/src/test/queries/clientpositive/union_remove_14.q index d98dbe5b82..64a5f23464 100644 --- a/ql/src/test/queries/clientpositive/union_remove_14.q +++ b/ql/src/test/queries/clientpositive/union_remove_14.q @@ -7,7 +7,6 @@ set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.auto.convert.join=true; set hive.merge.smallfiles.avgsize=1; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_15.q b/ql/src/test/queries/clientpositive/union_remove_15.q index 9c0f3a3bdf..effd590932 100644 --- a/ql/src/test/queries/clientpositive/union_remove_15.q +++ b/ql/src/test/queries/clientpositive/union_remove_15.q @@ -8,7 +8,6 @@ set hive.merge.mapredfiles=false; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_16.q b/ql/src/test/queries/clientpositive/union_remove_16.q index ec24cf090c..3bdb0f316c 100644 --- a/ql/src/test/queries/clientpositive/union_remove_16.q +++ b/ql/src/test/queries/clientpositive/union_remove_16.q @@ -6,7 +6,6 @@ set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; -set mapred.input.dir.recursive=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; diff --git a/ql/src/test/queries/clientpositive/union_remove_17.q b/ql/src/test/queries/clientpositive/union_remove_17.q index 92fa2e926e..16649c11dc 100644 --- a/ql/src/test/queries/clientpositive/union_remove_17.q +++ b/ql/src/test/queries/clientpositive/union_remove_17.q @@ -8,7 +8,6 @@ set hive.merge.mapredfiles=false; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_18.q b/ql/src/test/queries/clientpositive/union_remove_18.q index 8259198c2d..db8d99f5c6 100644 --- a/ql/src/test/queries/clientpositive/union_remove_18.q +++ b/ql/src/test/queries/clientpositive/union_remove_18.q @@ -8,7 +8,6 @@ set hive.merge.mapredfiles=false; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_19.q b/ql/src/test/queries/clientpositive/union_remove_19.q index 675f01b645..1466c985eb 100644 --- a/ql/src/test/queries/clientpositive/union_remove_19.q +++ b/ql/src/test/queries/clientpositive/union_remove_19.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_2.q b/ql/src/test/queries/clientpositive/union_remove_2.q index e5fe0bfe6e..7337a4cf94 100644 --- a/ql/src/test/queries/clientpositive/union_remove_2.q +++ b/ql/src/test/queries/clientpositive/union_remove_2.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_20.q b/ql/src/test/queries/clientpositive/union_remove_20.q index 79a90599c2..a84bce6bc0 100644 --- a/ql/src/test/queries/clientpositive/union_remove_20.q +++ b/ql/src/test/queries/clientpositive/union_remove_20.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_21.q b/ql/src/test/queries/clientpositive/union_remove_21.q index 088a80ea80..458144521f 100644 --- a/ql/src/test/queries/clientpositive/union_remove_21.q +++ b/ql/src/test/queries/clientpositive/union_remove_21.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_22.q b/ql/src/test/queries/clientpositive/union_remove_22.q index ef3ba51c4e..1e7fa44a68 100644 --- a/ql/src/test/queries/clientpositive/union_remove_22.q +++ b/ql/src/test/queries/clientpositive/union_remove_22.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/union_remove_23.q b/ql/src/test/queries/clientpositive/union_remove_23.q index 3145ac0568..ce807e5edf 100644 --- a/ql/src/test/queries/clientpositive/union_remove_23.q +++ b/ql/src/test/queries/clientpositive/union_remove_23.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_24.q b/ql/src/test/queries/clientpositive/union_remove_24.q index 6c34f565be..15f4ccd2df 100644 --- a/ql/src/test/queries/clientpositive/union_remove_24.q +++ b/ql/src/test/queries/clientpositive/union_remove_24.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_25.q b/ql/src/test/queries/clientpositive/union_remove_25.q index b186c2c5bb..df9ab7ed78 100644 --- a/ql/src/test/queries/clientpositive/union_remove_25.q +++ b/ql/src/test/queries/clientpositive/union_remove_25.q @@ -7,7 +7,6 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_26.q b/ql/src/test/queries/clientpositive/union_remove_26.q index 6f8891845b..73953c6551 100644 --- a/ql/src/test/queries/clientpositive/union_remove_26.q +++ b/ql/src/test/queries/clientpositive/union_remove_26.q @@ -21,7 +21,6 @@ insert into inputTbl3 select * from inputSrcTbl3; set hive.compute.query.using.stats=true; set hive.optimize.union.remove=true; -set mapred.input.dir.recursive=true; --- union remove optimization effects, stats optimization does not though it is on since inputTbl2 column stats is not available analyze table inputTbl1_n6 compute statistics for columns; @@ -76,7 +75,6 @@ select count(*) from ( set hive.compute.query.using.stats=false; set hive.optimize.union.remove=true; -set mapred.input.dir.recursive=true; explain SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl1_n6 diff --git a/ql/src/test/queries/clientpositive/union_remove_3.q b/ql/src/test/queries/clientpositive/union_remove_3.q index 490be99e7b..b60b01a5c9 100644 --- a/ql/src/test/queries/clientpositive/union_remove_3.q +++ b/ql/src/test/queries/clientpositive/union_remove_3.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->remove->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_4.q b/ql/src/test/queries/clientpositive/union_remove_4.q index adc45e7a9b..d4da8a5cb3 100644 --- a/ql/src/test/queries/clientpositive/union_remove_4.q +++ b/ql/src/test/queries/clientpositive/union_remove_4.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; -set mapred.input.dir.recursive=true; set hive.merge.smallfiles.avgsize=1; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/union_remove_5.q b/ql/src/test/queries/clientpositive/union_remove_5.q index 05f7c32a50..56f605db27 100644 --- a/ql/src/test/queries/clientpositive/union_remove_5.q +++ b/ql/src/test/queries/clientpositive/union_remove_5.q @@ -6,7 +6,6 @@ set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_6.q b/ql/src/test/queries/clientpositive/union_remove_6.q index 9dd5fb302c..e975641430 100644 --- a/ql/src/test/queries/clientpositive/union_remove_6.q +++ b/ql/src/test/queries/clientpositive/union_remove_6.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_6_subq.q b/ql/src/test/queries/clientpositive/union_remove_6_subq.q index 3ae5d95728..ac36e5a3f4 100644 --- a/ql/src/test/queries/clientpositive/union_remove_6_subq.q +++ b/ql/src/test/queries/clientpositive/union_remove_6_subq.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/union_remove_7.q b/ql/src/test/queries/clientpositive/union_remove_7.q index caca645f3b..7cb284780f 100644 --- a/ql/src/test/queries/clientpositive/union_remove_7.q +++ b/ql/src/test/queries/clientpositive/union_remove_7.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_8.q b/ql/src/test/queries/clientpositive/union_remove_8.q index 397460e627..2a6005bae8 100644 --- a/ql/src/test/queries/clientpositive/union_remove_8.q +++ b/ql/src/test/queries/clientpositive/union_remove_8.q @@ -5,7 +5,6 @@ set hive.optimize.union.remove=true; set hive.merge.sparkfiles=false; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_9.q b/ql/src/test/queries/clientpositive/union_remove_9.q index 3b24b95da3..37946ebc44 100644 --- a/ql/src/test/queries/clientpositive/union_remove_9.q +++ b/ql/src/test/queries/clientpositive/union_remove_9.q @@ -6,7 +6,6 @@ set hive.merge.sparkfiles=true; set hive.merge.mapfiles=true; set hive.merge.mapredfiles=true; set hive.merge.smallfiles.avgsize=1; -set mapred.input.dir.recursive=true; -- SORT_QUERY_RESULTS -- This is to test the union->selectstar->filesink optimization diff --git a/ql/src/test/queries/clientpositive/union_remove_plan.q b/ql/src/test/queries/clientpositive/union_remove_plan.q index e4ad41f2f3..fbc83bc4c1 100644 --- a/ql/src/test/queries/clientpositive/union_remove_plan.q +++ b/ql/src/test/queries/clientpositive/union_remove_plan.q @@ -4,7 +4,6 @@ create table if not exists test_table(column1 string, column2 int); insert into test_table values('a',1),('b',2); set hive.optimize.union.remove=true; -set mapred.input.dir.recursive=true; explain select column1 from test_table group by column1