diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 56a39df..31711c2 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1307,16 +1307,16 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "we are increasing the number of files possibly by a big margin. So, we merge aggressively."), HIVEOPTCORRELATION("hive.optimize.correlation", false, "exploit intra-query correlations."), - HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE("hive.optimize.limitjointranspose", false, - "Whether to push a limit through left/right outer join. If the value is true and the size of the outer\n" + - "input is reduced enough (as specified in hive.optimize.limitjointranspose.reduction), the limit is pushed\n" + - "to the outer input; to remain semantically correct, the limit is kept on top of the join too."), - HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limitjointranspose.reductionpercentage", 1.0f, - "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction of the\n" + - "size of the outer input of the join that we should get in order to apply the rule."), - HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limitjointranspose.reductiontuples", (long) 0, - "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction in the\n" + - "number of tuples of the outer input of the join that you should get in order to apply the rule."), + HIVE_OPTIMIZE_LIMIT_TRANSPOSE("hive.optimize.limittranspose", false, + "Whether to push a limit through left/right outer join or union. If the value is true and the size of the outer\n" + + "input is reduced enough (as specified in hive.optimize.limittranspose.reduction), the limit is pushed\n" + + "to the outer input or union; to remain semantically correct, the limit is kept on top of the join or the union too."), + HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limittranspose.reductionpercentage", 1.0f, + "When hive.optimize.limittranspose is true, this variable specifies the minimal reduction of the\n" + + "size of the outer input of the join or input of the union that we should get in order to apply the rule."), + HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limittranspose.reductiontuples", (long) 0, + "When hive.optimize.limittranspose is true, this variable specifies the minimal reduction in the\n" + + "number of tuples of the outer input of the join or the input of the union that you should get in order to apply the rule."), HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME("hive.optimize.skewjoin.compiletime", false, "Whether to create a separate plan for skewed keys for the tables in the join.\n" + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java new file mode 100644 index 0000000..cfe072e --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +/** + * Planner rule that pushes a + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit} + * past a + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion}. + */ +public class HiveSortUnionReduceRule extends RelOptRule { + + /** + * Rule instance for Union implementation that does not preserve the ordering + * of its inputs. Thus, it makes no sense to match this rule if the Sort does + * not have a limit, i.e., {@link Sort#fetch} is null. + */ + public static final HiveSortUnionReduceRule INSTANCE = new HiveSortUnionReduceRule(false); + + /** + * Rule instance for Union implementation that preserves the ordering of its + * inputs. It is still worth applying this rule even if the Sort does not have + * a limit, for the merge of already sorted inputs that the Union can do is + * usually cheap. + */ + public static final HiveSortUnionReduceRule MATCH_NULL_FETCH = new HiveSortUnionReduceRule(true); + + /** + * Whether to match a Sort whose {@link Sort#fetch} is null. Generally this + * only makes sense if the Union preserves order (and merges). + */ + private final boolean matchNullFetch; + + // ~ Constructors ----------------------------------------------------------- + + public HiveSortUnionReduceRule(boolean matchNullFetch) { + this(Sort.class, Union.class, matchNullFetch, RelFactories.LOGICAL_BUILDER, + "HiveSortUnionReduceRule:default"); + } + + /** + * Creates a HiveSortUnionReduceRule. + */ + public HiveSortUnionReduceRule(Class sortClass, + Class unionClass, boolean matchNullFetch, + RelBuilderFactory relBuilderFactory, String description) { + super(operand(sortClass, operand(unionClass, any())), relBuilderFactory, description); + this.matchNullFetch = matchNullFetch; + } + + // ~ Methods ---------------------------------------------------------------- + + @Override + public boolean matches(RelOptRuleCall call) { + final Sort sort = call.rel(0); + final Union union = call.rel(1); + + // We only apply this rule if Union.all is true. + // There is a flag indicating if this rule should be applied when + // Sort.fetch is null. + return union.all && (matchNullFetch || sort.fetch != null) + // Calite bug CALCITE-987 + && RexLiteral.intValue(sort.fetch) > 0; + } + + public void onMatch(RelOptRuleCall call) { + final Sort sort = call.rel(0); + final Union union = call.rel(1); + List inputs = new ArrayList<>(); + // Thus we use 'ret' as a flag to identify if we have finished pushing the + // sort past a union. + boolean ret = true; + for (RelNode input : union.getInputs()) { + if (!RelMdUtil.checkInputForCollationAndLimit(input, sort.getCollation(), sort.offset, + sort.fetch)) { + ret = false; + Sort branchSort = sort.copy(sort.getTraitSet(), input, sort.getCollation(), sort.offset, + sort.fetch); + ((HiveSortLimit) branchSort).setRuleCreated(true); + inputs.add(branchSort); + } else { + inputs.add(input); + } + } + // there is nothing to change + if (ret) { + return; + } + // create new union and sort + Union unionCopy = (Union) union.copy(union.getTraitSet(), inputs, union.all); + Sort result = sort.copy(sort.getTraitSet(), unionCopy, sort.getCollation(), sort.offset, + sort.fetch); + call.transformTo(result); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index a0d14dc..beb5ff6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -157,6 +157,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -1058,15 +1059,16 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10 - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE)) { + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE)) { // This should be a cost based decision, but till we enable the extended cost // model, we will use the given value for the variable final float reductionProportion = HiveConf.getFloatVar(conf, - HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE); + HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE); final long reductionTuples = HiveConf.getLongVar(conf, - HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES); + HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES); basePlan = hepPlan(basePlan, true, mdProvider, HiveSortMergeRule.INSTANCE, HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE); + basePlan = hepPlan(basePlan, true, mdProvider, HiveSortUnionReduceRule.INSTANCE); basePlan = hepPlan(basePlan, true, mdProvider, HepMatchOrder.BOTTOM_UP, new HiveSortRemoveRule(reductionProportion, reductionTuples), HiveProjectSortTransposeRule.INSTANCE); diff --git a/ql/src/test/queries/clientpositive/cbo_SortUnionTransposeRule.q b/ql/src/test/queries/clientpositive/cbo_SortUnionTransposeRule.q new file mode 100644 index 0000000..a393c81 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_SortUnionTransposeRule.q @@ -0,0 +1,72 @@ +set hive.mapred.mode=nonstrict; + +create table s as select * from src limit 10; + +explain +select key from s a +union all +select key from s b +order by key; + +explain +select key from s a +union all +select key from s b +limit 0; + +explain +select key from s a +union all +select key from s b +limit 5; + +explain +select key from s a +union all +select key from s b +order by key +limit 5; + +set hive.optimize.limittranspose=true; + +explain +select key from s a +union all +select key from s b +order by key; + +explain +select key from s a +union all +select key from s b +limit 0; + +explain +select key from s a +union all +select key from s b +limit 5; + +explain +select key from s a +union all +select key from s b +order by key +limit 5; + +set hive.optimize.limittranspose.reductionpercentage=0.1f; + +explain +select key from s a +union all +select key from s b +limit 5; + +set hive.optimize.limittranspose.reductionpercentage=1f; +set hive.optimize.limittranspose.reductiontuples=8; + +explain +select key from s a +union all +select key from s b +limit 5; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/limit_join_transpose.q b/ql/src/test/queries/clientpositive/limit_join_transpose.q index 80430c6..da34806 100644 --- a/ql/src/test/queries/clientpositive/limit_join_transpose.q +++ b/ql/src/test/queries/clientpositive/limit_join_transpose.q @@ -1,5 +1,5 @@ set hive.mapred.mode=nonstrict; -set hive.optimize.limitjointranspose=false; +set hive.optimize.limittranspose=false; explain select * @@ -13,9 +13,9 @@ on src1.key = src2.key limit 1; -set hive.optimize.limitjointranspose=true; -set hive.optimize.limitjointranspose.reductionpercentage=0.0001f; -set hive.optimize.limitjointranspose.reductiontuples=10; +set hive.optimize.limittranspose=true; +set hive.optimize.limittranspose.reductionpercentage=0.0001f; +set hive.optimize.limittranspose.reductiontuples=10; explain select * @@ -29,8 +29,8 @@ on src1.key = src2.key limit 1; -set hive.optimize.limitjointranspose.reductionpercentage=0.1f; -set hive.optimize.limitjointranspose.reductiontuples=10; +set hive.optimize.limittranspose.reductionpercentage=0.1f; +set hive.optimize.limittranspose.reductiontuples=10; explain select * @@ -60,8 +60,8 @@ from src src1 right outer join ( on src1.key = src2.key limit 1; -set hive.optimize.limitjointranspose.reductionpercentage=1f; -set hive.optimize.limitjointranspose.reductiontuples=0; +set hive.optimize.limittranspose.reductionpercentage=1f; +set hive.optimize.limittranspose.reductiontuples=0; explain select * diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q index 059da68..503cd51 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q @@ -4,7 +4,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src limit 5)src2 +) uniontbl; SET hive.optimize.ppd=true; SET hive.optimize.index.filter=true; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_char.q b/ql/src/test/queries/clientpositive/parquet_ppd_char.q index eaddcb4..74da3ec 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_char.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_char.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; set hive.optimize.index.filter=false; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_date.q b/ql/src/test/queries/clientpositive/parquet_ppd_date.q index 41d0d64..7d0b01e 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_date.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_date.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; -- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) select * from newtypestbl where da='1970-02-20'; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q index dfca486..9641f27 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q @@ -4,7 +4,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) select * from newtypestbl where d=0.22; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q index 1b9f6ff..ea04464 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src limit 5)src2 +) uniontbl; -- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) select * from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q index 6449c6d..d8a0048 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; set hive.optimize.index.filter=false; diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q index cecbbd7..ff4cde2 100644 --- a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q +++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q @@ -1,6 +1,6 @@ -set hive.optimize.limitjointranspose=true; -set hive.optimize.limitjointranspose.reductionpercentage=0.1f; -set hive.optimize.limitjointranspose.reductiontuples=100; +set hive.optimize.limittranspose=true; +set hive.optimize.limittranspose.reductionpercentage=0.1f; +set hive.optimize.limittranspose.reductiontuples=100; set hive.explain.user=false; set hive.auto.convert.join=false; set hive.optimize.dynamic.partition.hashjoin=false; diff --git a/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out b/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out new file mode 100644 index 0000000..7d7fc11 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out @@ -0,0 +1,694 @@ +PREHOOK: query: create table s as select * from src limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@s +POSTHOOK: query: create table s as select * from src limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@s +PREHOOK: query: explain +select key from s a +union all +select key from s b +order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s a +union all +select key from s b +limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +limit 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s a +union all +select key from s b +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s a +union all +select key from s b +order by key +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +order by key +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s a +union all +select key from s b +order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s a +union all +select key from s b +limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +limit 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s a +union all +select key from s b +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s a +union all +select key from s b +order by key +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +order by key +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + TableScan + Union + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s a +union all +select key from s b +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from s a +union all +select key from s b +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from s a +union all +select key from s b +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: a + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out index a1addb7..b386e74 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out @@ -41,27 +41,27 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 3 <- Union 4 (CONTAINS) -Map 6 <- Union 4 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_17] + File Output Operator [FS_27] compressed:false Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Merge Join Operator [MERGEJOIN_21] + Merge Join Operator [MERGEJOIN_31] | condition map:[{"":"Left Outer Join0 to 1"}] | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE |<-Map 1 [SIMPLE_EDGE] llap - | Reduce Output Operator [RS_13] + | Reduce Output Operator [RS_23] | key expressions:_col0 (type: int) | Map-reduce partition columns:_col0 (type: int) | sort order:+ @@ -72,45 +72,71 @@ Stage-0 | TableScan [TS_0] | alias:a | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - |<-Reducer 5 [SIMPLE_EDGE] llap - Reduce Output Operator [RS_14] + |<-Reducer 6 [SIMPLE_EDGE] llap + Reduce Output Operator [RS_24] key expressions:_col0 (type: int) Map-reduce partition columns:_col0 (type: int) sort order:+ Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit [LIM_11] + Limit [LIM_21] Number of rows:1 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_10] + Select Operator [SEL_20] | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - |<-Union 4 [SIMPLE_EDGE] - |<-Map 3 [CONTAINS] llap - | Reduce Output Operator [RS_9] + |<-Union 5 [SIMPLE_EDGE] + |<-Reducer 4 [CONTAINS] uber + | Reduce Output Operator [RS_19] | sort order: | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE | value expressions:_col0 (type: int) - | Limit [LIM_8] + | Limit [LIM_18] | Number of rows:1 | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - | Select Operator [SEL_3] - | outputColumnNames:["_col0"] + | Limit [LIM_7] + | Number of rows:1 | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - | TableScan [TS_2] - | alias:tb2 - | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - |<-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_9] + | Select Operator [SEL_6] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | |<-Map 3 [SIMPLE_EDGE] llap + | Reduce Output Operator [RS_5] + | sort order: + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | value expressions:_col0 (type: int) + | Limit [LIM_4] + | Number of rows:1 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | Select Operator [SEL_3] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | TableScan [TS_2] + | alias:tb2 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + |<-Reducer 8 [CONTAINS] uber + Reduce Output Operator [RS_19] sort order: Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE value expressions:_col0 (type: int) - Limit [LIM_8] + Limit [LIM_18] Number of rows:1 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_5] + Select Operator [SEL_15] outputColumnNames:["_col0"] Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - TableScan [TS_4] - alias:tb2 - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Limit [LIM_14] + | Number of rows:1 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Map 7 [SIMPLE_EDGE] llap + Reduce Output Operator [RS_12] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Limit [LIM_11] + Number of rows:1 + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_10] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan [TS_9] + alias:tb2 + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out index 6e62ee4..084d6ff 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out index c1565f8..6269c4b 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out index 55231e9..840e539 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out index a30820e..1ca671e 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out index 3b3e5b7..546c9e5 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out index 5a62e80..30f087c 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/spark/temp_table.q.out b/ql/src/test/results/clientpositive/spark/temp_table.q.out index 119d26d..174d585 100644 --- a/ql/src/test/results/clientpositive/spark/temp_table.q.out +++ b/ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -229,7 +229,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1), Map 3 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) + Reducer 5 <- Map 4 (SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1), Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -244,10 +246,10 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: bar @@ -259,7 +261,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 @@ -267,7 +269,22 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -278,6 +295,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator