diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java new file mode 100644 index 0000000..d002d47 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortUnionReduceRule.java @@ -0,0 +1,146 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rel.core.Union; +import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; + +/** + * Planner rule that pushes a + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit} + * past a + * {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion}. + */ +public class HiveSortUnionReduceRule extends RelOptRule { + + /** Rule instance for Union implementation that does not preserve the + * ordering of its inputs. Thus, it makes no sense to match this rule + * if the Sort does not have a limit, i.e., {@link Sort#fetch} is null. */ + public static final HiveSortUnionReduceRule INSTANCE = new HiveSortUnionReduceRule(false); + + /** Rule instance for Union implementation that preserves the ordering + * of its inputs. It is still worth applying this rule even if the Sort + * does not have a limit, for the merge of already sorted inputs that + * the Union can do is usually cheap. */ + public static final HiveSortUnionReduceRule MATCH_NULL_FETCH = new HiveSortUnionReduceRule(true); + + /** Whether to match a Sort whose {@link Sort#fetch} is null. Generally + * this only makes sense if the Union preserves order (and merges). */ + private final boolean matchNullFetch; + + // ~ Constructors ----------------------------------------------------------- + + public HiveSortUnionReduceRule(boolean matchNullFetch) { + this(HiveSortLimit.class, HiveUnion.class, matchNullFetch, RelFactories.LOGICAL_BUILDER, + "HiveSortUnionReduceRule:default"); + } + + /** + * Creates a HiveSortUnionReduceRule. + */ + public HiveSortUnionReduceRule( + Class sortClass, + Class unionClass, + boolean matchNullFetch, + RelBuilderFactory relBuilderFactory, + String description) { + super( + operand(sortClass, + operand(unionClass, any())), + relBuilderFactory, description); + this.matchNullFetch = matchNullFetch; + } + + // ~ Methods ---------------------------------------------------------------- + + @Override public boolean matches(RelOptRuleCall call) { + final Sort sort = call.rel(0); + final Union union = call.rel(1); + // We only apply this rule if Union.all is true and Sort.offset is null. + // There is a flag indicating if this rule should be applied when + // Sort.fetch is null. + return union.all + && sort.offset == null + && (matchNullFetch || sort.fetch != null); + } + + public void onMatch(RelOptRuleCall call) { + final Sort sort = call.rel(0); + final Union union = call.rel(1); + List inputs = new ArrayList<>(); + // Thus we use 'ret' as a flag to identify if we have finished pushing the + // sort past a union. + boolean ret = true; + int needed = RexLiteral.intValue(sort.fetch); + for (RelNode input : union.getInputs()) { + // This is a temporary solution. The correct solution is to use Calcite's + // SortUnionTransposeRule with patch from CALCITE-987 + if ((input instanceof HepRelVertex) + && ((HepRelVertex) input).getCurrentRel() instanceof HiveSortLimit) { + // change the fetch number + HiveSortLimit inputAsSort = (HiveSortLimit) ((HepRelVertex) input).getCurrentRel(); + if (RexLiteral.intValue(inputAsSort.fetch) != needed) { + ret = false; + RexNode fetch = inputAsSort + .getCluster() + .getRexBuilder() + .makeExactLiteral( + BigDecimal.valueOf(Math.min(RexLiteral.intValue(inputAsSort.fetch), needed))); + Sort newInputAsSort = inputAsSort.copy(inputAsSort.getTraitSet(), inputAsSort.getInput(), + inputAsSort.getCollation(), inputAsSort.offset, fetch); + inputs.add(newInputAsSort); + } else { + inputs.add(input); + } + } else { + ret = false; + // add a fetch + Sort branchSort = sort.copy(sort.getTraitSet(), input, sort.getCollation(), sort.offset, + sort.fetch); + inputs.add(branchSort); + } + } + // there is nothing to change + if (ret) { + return; + } + // create new union and sort + Union unionCopy = (Union) union + .copy(union.getTraitSet(), inputs, union.all); + Sort result = sort.copy(sort.getTraitSet(), unionCopy, sort.getCollation(), + sort.offset, sort.fetch); + call.transformTo(result); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index d7e2bdb..bce2631 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -156,6 +156,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -1102,6 +1103,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY)); + basePlan = hepPlan(basePlan, true, mdProvider, new HiveSortUnionReduceRule(false)); return basePlan; } diff --git a/ql/src/test/queries/clientpositive/cbo_SortUnionTransposeRule.q b/ql/src/test/queries/clientpositive/cbo_SortUnionTransposeRule.q new file mode 100644 index 0000000..e175bfd --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_SortUnionTransposeRule.q @@ -0,0 +1,39 @@ +explain +select key from src a +union all +select key from src b +order by key; + +explain +select key from src a +union all +select key from src b +limit 2; + +explain +select key from src a +union all +select key from src b +order by key +limit 2; + +set hive.cbo.enable=false; + +explain +select key from src a +union all +select key from src b +order by key; + +explain +select key from src a +union all +select key from src b +limit 2; + +explain +select key from src a +union all +select key from src b +order by key +limit 2; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q index 059da68..503cd51 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_boolean.q @@ -4,7 +4,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src limit 5)src2 +) uniontbl; SET hive.optimize.ppd=true; SET hive.optimize.index.filter=true; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_char.q b/ql/src/test/queries/clientpositive/parquet_ppd_char.q index eaddcb4..74da3ec 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_char.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_char.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; set hive.optimize.index.filter=false; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_date.q b/ql/src/test/queries/clientpositive/parquet_ppd_date.q index 41d0d64..7d0b01e 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_date.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_date.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; -- date data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) select * from newtypestbl where da='1970-02-20'; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q index dfca486..9641f27 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_decimal.q @@ -4,7 +4,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) select * from newtypestbl where d=0.22; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q index 1b9f6ff..ea04464 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_timestamp.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), ts timestamp) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src limit 5)src2 +) uniontbl; -- timestamp data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) select * from newtypestbl where cast(ts as string)='2011-01-01 01:01:01'; diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q index 6449c6d..d8a0048 100644 --- a/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q +++ b/ql/src/test/queries/clientpositive/parquet_ppd_varchar.q @@ -5,7 +5,12 @@ SET mapred.max.split.size=5000; create table newtypestbl(c char(10), v varchar(10), d decimal(5,3), da date) stored as parquet; -insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl; +insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl; set hive.optimize.index.filter=false; diff --git a/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out b/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out new file mode 100644 index 0000000..dd791ea --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_SortUnionTransposeRule.q.out @@ -0,0 +1,502 @@ +PREHOOK: query: explain +select key from src a +union all +select key from src b +order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +limit 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +limit 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + Union + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +order by key +limit 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +order by key +limit 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan + Union + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +limit 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +limit 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key from src a +union all +select key from src b +order by key +limit 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from src a +union all +select key from src b +order by key +limit 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 2 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/merge4.q.out b/ql/src/test/results/clientpositive/merge4.q.out index 031376f..9ccaf1f 100644 --- a/ql/src/test/results/clientpositive/merge4.q.out +++ b/ql/src/test/results/clientpositive/merge4.q.out @@ -2763,13 +2763,15 @@ select * from ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 - Stage-4 - Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 - Stage-3 + Stage-2 depends on stages: Stage-1, Stage-9 + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 - Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + Stage-9 is a root stage STAGE PLANS: Stage: Stage-1 @@ -2782,52 +2784,72 @@ STAGE PLANS: expressions: key (type: string), value (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 1500 Data size: 140124 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '1' (type: string), '1' (type: string), 'file,' (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 129500 Basic stats: COMPLETE Column stats: COMPLETE - Union - Statistics: Num rows: 1500 Data size: 140124 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Union + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + TableScan + Union + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-4 + Stage: Stage-5 Move Operator files: hdfs directory: true @@ -2846,10 +2868,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-2 + Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -2861,7 +2883,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2873,12 +2895,41 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part - Stage: Stage-6 + Stage: Stage-7 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 129500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: '1' (type: string), '1' (type: string), 'file,' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + PREHOOK: query: insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select * from ( select key, value, hr from srcpart where ds='2008-04-08' diff --git a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out index 6e62ee4..084d6ff 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_boolean.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out index defaa9d..561fa52 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out index 55231e9..840e539 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_date.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_date.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out index a30820e..1ca671e 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_decimal.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out index 3b3e5b7..546c9e5 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_timestamp.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("2011-01-01 01:01:01" as timestamp) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("2011-01-20 01:01:01" as timestamp) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out index 5a62e80..30f087c 100644 --- a/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out +++ b/ql/src/test/results/clientpositive/parquet_ppd_varchar.q.out @@ -6,11 +6,21 @@ POSTHOOK: query: create table newtypestbl(c char(10), v varchar(10), d decimal(5 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@newtypestbl -PREHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +PREHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@newtypestbl -POSTHOOK: query: insert overwrite table newtypestbl select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src src2 limit 10) uniontbl +POSTHOOK: query: insert overwrite table newtypestbl +select * from ( +select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, cast("1970-02-20" as date) from src limit 5)src1 +union all +select * from (select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, cast("1970-02-27" as date) from src limit 5)src2 +) uniontbl POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@newtypestbl diff --git a/ql/src/test/results/clientpositive/spark/temp_table.q.out b/ql/src/test/results/clientpositive/spark/temp_table.q.out index 718a8a4..ec0e807 100644 --- a/ql/src/test/results/clientpositive/spark/temp_table.q.out +++ b/ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -228,7 +228,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1), Map 3 (SORT, 1) + Reducer 2 <- Map 1 (SORT, 1) + Reducer 5 <- Map 4 (SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1), Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -243,9 +245,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: bar @@ -257,14 +259,28 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -275,6 +291,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/temp_table.q.out b/ql/src/test/results/clientpositive/temp_table.q.out index a9f2bae..a4d0456 100644 --- a/ql/src/test/results/clientpositive/temp_table.q.out +++ b/ql/src/test/results/clientpositive/temp_table.q.out @@ -298,7 +298,9 @@ POSTHOOK: query: explain select * from (select * from foo union all select * fro POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -311,13 +313,64 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Union + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan alias: bar Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE @@ -325,28 +378,25 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/temp_table.q.out b/ql/src/test/results/clientpositive/tez/temp_table.q.out index 200ccdd..b4418d1 100644 --- a/ql/src/test/results/clientpositive/tez/temp_table.q.out +++ b/ql/src/test/results/clientpositive/tez/temp_table.q.out @@ -236,9 +236,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) - Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -253,9 +253,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: bar @@ -267,14 +267,28 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 247 Data size: 2609 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE @@ -285,8 +299,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 253 Data size: 2703 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator