diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java new file mode 100644 index 0000000..4926638 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.calcite.tools.RelBuilderFactory; + +/** + * Planner rule that pushes a + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit} + * past a + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion}. + */ +public class HiveSortUnionReduceRule extends RelOptRule { + + /** Rule instance for Union implementation that does not preserve the + * ordering of its inputs. Thus, it makes no sense to match this rule + * if the Sort does not have a limit, i.e., {@link Sort#fetch} is null. */ + public static final HiveSortUnionReduceRule INSTANCE = new HiveSortUnionReduceRule(false); + + /** Rule instance for Union implementation that preserves the ordering + * of its inputs. It is still worth applying this rule even if the Sort + * does not have a limit, for the merge of already sorted inputs that + * the Union can do is usually cheap. */ + public static final HiveSortUnionReduceRule MATCH_NULL_FETCH = new HiveSortUnionReduceRule(true); + + /** Whether to match a Sort whose {@link Sort#fetch} is null. Generally + * this only makes sense if the Union preserves order (and merges). */ + private final boolean matchNullFetch; + + // ~ Constructors ----------------------------------------------------------- + + public HiveSortUnionReduceRule(boolean matchNullFetch) { + this(Sort.class, Union.class, matchNullFetch, RelFactories.LOGICAL_BUILDER, + "HiveSortUnionReduceRule:default"); + } + + /** + * Creates a HiveSortUnionReduceRule. + */ + public HiveSortUnionReduceRule( + Class sortClass, + Class unionClass, + boolean matchNullFetch, + RelBuilderFactory relBuilderFactory, + String description) { + super( + operand(sortClass, + operand(unionClass, any())), + relBuilderFactory, description); + this.matchNullFetch = matchNullFetch; + } + + // ~ Methods ---------------------------------------------------------------- + + @Override public boolean matches(RelOptRuleCall call) { + final Sort sort = call.rel(0); + final Union union = call.rel(1); + + // We only apply this rule if Union.all is true and Sort.offset is null. + // There is a flag indicating if this rule should be applied when + // Sort.fetch is null. + return union.all + && sort.offset == null + && (matchNullFetch || sort.fetch != null); + } + + public void onMatch(RelOptRuleCall call) { + final Sort sort = call.rel(0); + final Union union = call.rel(1); + + List inputs = new ArrayList<>(); + // Thus we use 'ret' as a flag to identify if we have finished pushing the + // sort past a union. + boolean ret = true; + + // Register that we have visited this operator in this rule + HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); + for (RelNode input : union.getInputs()) { + // If this operator has been visited already by the rule, + // we do not need to apply the optimization + if (registry != null && input instanceof HepRelVertex + && registry.getVisited(this).contains(((HepRelVertex) input).getCurrentRel())) { + return; + } else { + if (!RelMdUtil.checkInputForCollationAndLimit(input, sort.getCollation(), sort.offset, + sort.fetch)) { + ret = false; + Sort branchSort = sort.copy(sort.getTraitSet(), input, sort.getCollation(), sort.offset, + sort.fetch); + inputs.add(branchSort); + if (registry != null) { + registry.registerVisited(this, branchSort); + } + } else { + inputs.add(input); + } + } + } + // there is nothing to change + if (ret) { + return; + } + // create new union and sort + Union unionCopy = (Union) union + .copy(union.getTraitSet(), inputs, union.all); + Sort result = sort.copy(sort.getTraitSet(), unionCopy, sort.getCollation(), + sort.offset, sort.fetch); + + call.transformTo(result); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index c005b1a..0518d35 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -156,6 +156,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -1086,6 +1087,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY)); + basePlan = hepPlan(basePlan, true, mdProvider, new HiveSortUnionReduceRule(false)); return basePlan; } diff --git a/ql/src/test/queries/clientpositive/cbo_SortUnionTransposeRule.q b/ql/src/test/queries/clientpositive/cbo_SortUnionTransposeRule.q new file mode 100644 index 0000000..8fac56a --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_SortUnionTransposeRule.q @@ -0,0 +1,51 @@ +explain +select key from src a +union all +select key from src b +order by key; + +explain +select key from src a +union all +select key from src b +limit 0; + +explain +select key from src a +union all +select key from src b +limit 2; + +explain +select key from src a +union all +select key from src b +order by key +limit 2; + +set hive.cbo.enable=false; + +explain +select key from src a +union all +select key from src b +order by key; + +explain +select key from src a +union all +select key from src b +limit 0; + +explain +select key from src a +union all +select key from src b +limit 2; + +explain +select key from src a +union all +select key from src b +order by key +limit 2; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q index 059da68..503cd51 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q @@ -4,7 +4,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src limit 5)src2 +) uniontbl; SET hive.optimize.ppd=true; SET hive.optimize.index.filter=true; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_char.q b/ql/src/test/queries/clientpositive/parquet_ppd_char.q index eaddcb4..74da3ec 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_char.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_char.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; set hive.optimize.index.filter=false; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_date.q b/ql/src/test/queries/clientpositive/parquet_ppd_date.q index 41d0d64..7d0b01e 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_date.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_date.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; -- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) select * from newtypestbl where da='1970-02-20'; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q index dfca486..9641f27 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q @@ -4,7 +4,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) select * from newtypestbl where d=0.22; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q index 1b9f6ff..ea04464 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src limit 5)src2 +) uniontbl; -- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) select * from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q index 6449c6d..d8a0048 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; set hive.optimize.index.filter=false; diff --git a/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out b/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out new file mode 100644 index 0000000..c8ab110 --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out @@ -0,0 +1,554 @@ +PREHOOK: query: explain +select key from src a +union all +select key from src b +order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +limit 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +limit 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +limit 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +order by key +limit 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +order by key +limit 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + TableScan + Union + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +limit 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +limit 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +limit 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +order by key +limit 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +order by key +limit 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out index a1addb7..b386e74 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_dpp.q.out @@ -41,27 +41,27 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 3 <- Union 4 (CONTAINS) -Map 6 <- Union 4 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_17] + File Output Operator [FS_27] compressed:false Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Merge Join Operator [MERGEJOIN_21] + Merge Join Operator [MERGEJOIN_31] | condition map:[{"":"Left Outer Join0 to 1"}] | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE |<-Map 1 [SIMPLE_EDGE] llap - | Reduce Output Operator [RS_13] + | Reduce Output Operator [RS_23] | key expressions:_col0 (type: int) | Map-reduce partition columns:_col0 (type: int) | sort order:+ @@ -72,45 +72,71 @@ Stage-0 | TableScan [TS_0] | alias:a | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - |<-Reducer 5 [SIMPLE_EDGE] llap - Reduce Output Operator [RS_14] + |<-Reducer 6 [SIMPLE_EDGE] llap + Reduce Output Operator [RS_24] key expressions:_col0 (type: int) Map-reduce partition columns:_col0 (type: int) sort order:+ Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit [LIM_11] + Limit [LIM_21] Number of rows:1 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_10] + Select Operator [SEL_20] | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - |<-Union 4 [SIMPLE_EDGE] - |<-Map 3 [CONTAINS] llap - | Reduce Output Operator [RS_9] + |<-Union 5 [SIMPLE_EDGE] + |<-Reducer 4 [CONTAINS] uber + | Reduce Output Operator [RS_19] | sort order: | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE | value expressions:_col0 (type: int) - | Limit [LIM_8] + | Limit [LIM_18] | Number of rows:1 | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - | Select Operator [SEL_3] - | outputColumnNames:["_col0"] + | Limit [LIM_7] + | Number of rows:1 | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - | TableScan [TS_2] - | alias:tb2 - | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - |<-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_9] + | Select Operator [SEL_6] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | |<-Map 3 [SIMPLE_EDGE] llap + | Reduce Output Operator [RS_5] + | sort order: + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | value expressions:_col0 (type: int) + | Limit [LIM_4] + | Number of rows:1 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | Select Operator [SEL_3] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | TableScan [TS_2] + | alias:tb2 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + |<-Reducer 8 [CONTAINS] uber + Reduce Output Operator [RS_19] sort order: Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE value expressions:_col0 (type: int) - Limit [LIM_8] + Limit [LIM_18] Number of rows:1 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_5] + Select Operator [SEL_15] outputColumnNames:["_col0"] Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - TableScan [TS_4] - alias:tb2 - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Limit [LIM_14] + | Number of rows:1 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Map 7 [SIMPLE_EDGE] llap + Reduce Output Operator [RS_12] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Limit [LIM_11] + Number of rows:1 + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_10] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan [TS_9] + alias:tb2 + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/merge4.q.out b/ql/src/test/results/clientpositive/merge4.q.out index 08e4455..afe360f 100644 --- a/ql/src/test/results/clientpositive/merge4.q.out +++ b/ql/src/test/results/clientpositive/merge4.q.out @@ -2763,13 +2763,15 @@ select * from ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1, Stage-9 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + Stage-9 is a root stage STAGE PLANS: Stage: Stage-1 @@ -2782,54 +2784,75 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 1500 Data size: 140124 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '1' (type: string), '1' (type: string), 'file,' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 129500 Basic stats: COMPLETE Column stats: COMPLETE - Union - Statistics: Num rows: 1500 Data size: 140124 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Union + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + TableScan + Union + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -2848,10 +2871,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -2863,7 +2886,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2875,12 +2898,42 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 129500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: '1' (type: string), '1' (type: string), 'file,' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + PREHOOK: query: insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select * from ( select key, value, hr from srcpart where ds='2008-04-08' diff --git a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out index 6e62ee4..084d6ff 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out index defaa9d..561fa52 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out index 55231e9..840e539 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out index a30820e..1ca671e 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out index 3b3e5b7..546c9e5 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out index 5a62e80..30f087c 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/spark/temp_table.q.out b/ql/src/test/results/clientpositive/spark/temp_table.q.out index 119d26d..174d585 100644 --- a/ql/src/test/results/clientpositive/spark/temp_table.q.out +++ b/ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -229,7 +229,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1), Map 3 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) + Reducer 5 <- Map 4 (SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1), Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -244,10 +246,10 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: bar @@ -259,7 +261,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 @@ -267,7 +269,22 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -278,6 +295,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/temp_table.q.out b/ql/src/test/results/clientpositive/temp_table.q.out index f3bef65..7b8ef37 100644 --- a/ql/src/test/results/clientpositive/temp_table.q.out +++ b/ql/src/test/results/clientpositive/temp_table.q.out @@ -299,7 +299,9 @@ POSTHOOK: query: explain select * from (select * from foo union all select * fro POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -312,14 +314,67 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + TableScan + Union + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan alias: bar Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE @@ -327,29 +382,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/constprog_dpp.q.out b/ql/src/test/results/clientpositive/tez/constprog_dpp.q.out index acebd05..bfe771d 100644 --- a/ql/src/test/results/clientpositive/tez/constprog_dpp.q.out +++ b/ql/src/test/results/clientpositive/tez/constprog_dpp.q.out @@ -41,27 +41,27 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 3 <- Union 4 (CONTAINS) -Map 6 <- Union 4 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 - File Output Operator [FS_17] + File Output Operator [FS_27] compressed:false Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Merge Join Operator [MERGEJOIN_21] + Merge Join Operator [MERGEJOIN_31] | condition map:[{"":"Left Outer Join0 to 1"}] | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_13] + | Reduce Output Operator [RS_23] | key expressions:_col0 (type: int) | Map-reduce partition columns:_col0 (type: int) | sort order:+ @@ -72,45 +72,71 @@ Stage-0 | TableScan [TS_0] | alias:a | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - |<-Reducer 5 [SIMPLE_EDGE] - Reduce Output Operator [RS_14] + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_24] key expressions:_col0 (type: int) Map-reduce partition columns:_col0 (type: int) sort order:+ Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Limit [LIM_11] + Limit [LIM_21] Number of rows:1 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_10] + Select Operator [SEL_20] | outputColumnNames:["_col0"] | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - |<-Union 4 [SIMPLE_EDGE] - |<-Map 3 [CONTAINS] - | Reduce Output Operator [RS_9] + |<-Union 5 [SIMPLE_EDGE] + |<-Reducer 4 [CONTAINS] + | Reduce Output Operator [RS_19] | sort order: | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE | value expressions:_col0 (type: int) - | Limit [LIM_8] + | Limit [LIM_18] | Number of rows:1 | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - | Select Operator [SEL_3] - | outputColumnNames:["_col0"] + | Limit [LIM_7] + | Number of rows:1 | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - | TableScan [TS_2] - | alias:tb2 - | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - |<-Map 6 [CONTAINS] - Reduce Output Operator [RS_9] + | Select Operator [SEL_6] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | sort order: + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | value expressions:_col0 (type: int) + | Limit [LIM_4] + | Number of rows:1 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | Select Operator [SEL_3] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | TableScan [TS_2] + | alias:tb2 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + |<-Reducer 8 [CONTAINS] + Reduce Output Operator [RS_19] sort order: Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE value expressions:_col0 (type: int) - Limit [LIM_8] + Limit [LIM_18] Number of rows:1 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_5] + Select Operator [SEL_15] outputColumnNames:["_col0"] Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - TableScan [TS_4] - alias:tb2 - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Limit [LIM_14] + | Number of rows:1 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Limit [LIM_11] + Number of rows:1 + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_10] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan [TS_9] + alias:tb2 + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/tez/temp_table.q.out b/ql/src/test/results/clientpositive/tez/temp_table.q.out index ec3f810..62bce88 100644 --- a/ql/src/test/results/clientpositive/tez/temp_table.q.out +++ b/ql/src/test/results/clientpositive/tez/temp_table.q.out @@ -237,9 +237,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -254,10 +254,10 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: bar @@ -269,15 +269,30 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -288,8 +303,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator