diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index c1ab64c90f..653a3c1170 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -61,7 +61,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdParallelism; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdPredicates; -import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMaxRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRuntimeRowCount; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity; @@ -84,7 +83,6 @@ new HiveRelMdCost(HiveDefaultCostModel.getCostModel()).getMetadataProvider(), HiveRelMdSelectivity.SOURCE, HiveRelMdRuntimeRowCount.SOURCE, - HiveRelMdMaxRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, HiveRelMdSize.SOURCE, @@ -156,7 +154,6 @@ private RelMetadataProvider init(HiveConf hiveConf) { new HiveRelMdCost(HiveOnTezCostModel.getCostModel(hiveConf)).getMetadataProvider(), HiveRelMdSelectivity.SOURCE, HiveRelMdRowCount.SOURCE, - HiveRelMdMaxRowCount.SOURCE, HiveRelMdUniqueKeys.SOURCE, HiveRelMdColumnUniqueness.SOURCE, HiveRelMdSize.SOURCE, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitRemoveRule.java index 858aa1a9ec..290eb1e4ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitRemoveRule.java @@ -19,6 +19,7 @@ import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rex.RexLiteral; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; @@ -41,8 +42,17 @@ public boolean matches(RelOptRuleCall call) { final HiveSortLimit sortLimit = call.rel(0); Double maxRowCount = call.getMetadataQuery().getMaxRowCount(sortLimit.getInput()); - if (maxRowCount != null &&(maxRowCount <= 1)) { - return true; + if (maxRowCount != null) { + if (sortLimit.getFetchExpr() != null) { + // we have LIMIT + int limit = RexLiteral.intValue(sortLimit.getFetchExpr()); + if (maxRowCount <= limit) { + return true; + } + } else if (maxRowCount <= 1) { + // No LIMIT only ORDER BY + return true; + } } return false; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java deleted file mode 100644 index b45d7652a6..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdMaxRowCount.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* TODO: CALC-2991 created some optimizations. This file bypasses - the change for now (see HIVE-22408) -*/ -package org.apache.hadoop.hive.ql.optimizer.calcite.stats; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.calcite.plan.RelOptCost; -import org.apache.calcite.plan.RelOptUtil; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Aggregate; -import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; -import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; -import org.apache.calcite.rel.metadata.RelMdDistinctRowCount; -import org.apache.calcite.rel.metadata.RelMdUtil; -import org.apache.calcite.rel.metadata.RelMetadataProvider; -import org.apache.calcite.rel.metadata.RelMdMaxRowCount; -import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.util.BuiltInMethod; -import org.apache.calcite.util.ImmutableBitSet; -import org.apache.calcite.util.NumberUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; -import org.apache.hadoop.hive.ql.plan.ColStatistics; - -import com.google.common.collect.ImmutableList; - -public class HiveRelMdMaxRowCount extends RelMdMaxRowCount { - - private static final HiveRelMdMaxRowCount INSTANCE = - new HiveRelMdMaxRowCount(); - - public static final RelMetadataProvider SOURCE = - ChainedRelMetadataProvider.of( - ImmutableList.of( - ReflectiveRelMetadataProvider.reflectiveSource( - BuiltInMethod.MAX_ROW_COUNT.method, new HiveRelMdMaxRowCount()), - RelMdMaxRowCount.SOURCE)); - - private HiveRelMdMaxRowCount() { - super(); - } - - @Override - public Double getMaxRowCount(Aggregate rel, RelMetadataQuery mq) { - if (rel.getGroupSet().isEmpty()) { - // Aggregate with no GROUP BY always returns 1 row (even on empty table). - return 1D; - } - - final Double rowCount = mq.getMaxRowCount(rel.getInput()); - if (rowCount == null) { - return null; - } - return rowCount * rel.getGroupSets().size(); - } - -} diff --git a/ql/src/test/queries/clientpositive/cbo_limit.q b/ql/src/test/queries/clientpositive/cbo_limit.q index d0b1bc9351..336f9e246e 100644 --- a/ql/src/test/queries/clientpositive/cbo_limit.q +++ b/ql/src/test/queries/clientpositive/cbo_limit.q @@ -34,6 +34,9 @@ select count(*) cs from cbo_t1 where c_int > 1 LIMIT 100; explain cbo select c_int from (select c_int from cbo_t1 where c_float > 1.0 limit 1) subq where c_int > 1 order by c_int; select c_int from (select c_int from cbo_t1 where c_float > 1.0 limit 1) subq where c_int > 1 order by c_int; +explain cbo select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0; +select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0; + -- prune un-necessary aggregates explain cbo select count(*) from cbo_t1 order by sum(c_int), count(*); select count(*) from cbo_t1 order by sum(c_int), count(*); diff --git a/ql/src/test/results/clientpositive/llap/cbo_limit.q.out b/ql/src/test/results/clientpositive/llap/cbo_limit.q.out index ee62b31ba4..ab53e94df3 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_limit.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_limit.q.out @@ -192,6 +192,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### +PREHOOK: query: explain cbo select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(fetch=[0]) + HiveProject(_o__c0=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject($f0=[true]) + HiveFilter(condition=[>($3, 1.0E0)]) + HiveTableScan(table=[[default, cbo_t1]], table:alias=[cbo_t1]) + +PREHOOK: query: select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### PREHOOK: query: explain cbo select count(*) from cbo_t1 order by sum(c_int), count(*) PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 319bba6a13..539456d451 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -1582,36 +1582,27 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator - limit:5 + limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_13] - Limit [LIM_12] (rows=5 width=89) - Number of rows:5 - Select Operator [SEL_11] (rows=5 width=89) - Output:["_col0","_col1"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - Limit [LIM_8] (rows=5 width=89) - Number of rows:5 - Select Operator [SEL_7] (rows=5 width=89) - Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - Limit [LIM_4] (rows=5 width=89) - Number of rows:5 - Select Operator [SEL_3] (rows=20 width=84) - Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - Select Operator [SEL_1] (rows=20 width=84) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=20 width=84) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + Reducer 3 llap + File Output Operator [FS_11] + Select Operator [SEL_10] (rows=5 width=89) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] + Limit [LIM_4] (rows=5 width=89) + Number of rows:5 + Select Operator [SEL_3] (rows=20 width=84) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=20 width=84) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=20 width=84) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key order by a limit 5) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key order by q/10 desc, r asc limit 5) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c order by cbo_t3.c_int+c desc, c limit 5 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out index 41c6e686f6..e05f8fbed6 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out @@ -449,8 +449,8 @@ POSTHOOK: Lineage: part_null_n1.p_partkey SCRIPT [] POSTHOOK: Lineage: part_null_n1.p_retailprice SCRIPT [] POSTHOOK: Lineage: part_null_n1.p_size SCRIPT [] POSTHOOK: Lineage: part_null_n1.p_type SCRIPT [] -Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name from part_null_n1 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -547,10 +547,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -580,9 +580,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -590,8 +593,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -636,7 +639,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 input vertices: 1 Reducer 3 - Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -710,9 +713,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 8d68a68892..b768854e72 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -2818,7 +2818,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -2851,17 +2851,11 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3050,7 +3044,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -3083,17 +3077,11 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 9c2f946a5e..052f33fdf6 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -3577,7 +3577,7 @@ almond aquamarine sandy cyan gainsboro almond aquamarine yellow dodger mint almond azure aquamarine papaya violet almond azure blanched chiffon midnight -Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -3747,7 +3747,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -3810,17 +3810,11 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3828,7 +3822,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -4046,7 +4040,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -4112,17 +4106,11 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index b9c674fdf7..d0bac2a593 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -370,8 +370,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n0 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -468,10 +468,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -501,9 +501,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -511,8 +514,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_name = (select p_name from part_null_n0 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2457,9 +2460,8 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[58][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -2477,54 +2479,29 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 5 <- Reducer 4 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (XPROD_EDGE), Reducer 11 (XPROD_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s1 - filterExpr: (key = '90') (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = '90') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: true (type: boolean) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 10 + Map 5 Map Operator Tree: TableScan alias: src @@ -2548,66 +2525,74 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) + alias: s1 + filterExpr: (key = '90') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '90') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: true (type: boolean) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 11 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.0 + keys: _col0 (type: string) + minReductionHashAggr: 0.5 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2617,26 +2602,11 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2, _col3 - residual filter predicates: {(_col3 > _col1)} + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 > _col2)} Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2647,6 +2617,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2670,51 +2652,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - residual filter predicates: {(_col1 <> _col2)} - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2722,9 +2659,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[58][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -7020,8 +6956,7 @@ POSTHOOK: query: drop table tempty_n0 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tempty_n0 POSTHOOK: Output: default@tempty_n0 -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 group by 4) PREHOOK: type: QUERY @@ -7042,122 +6977,73 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 5 <- Reducer 4 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - keys: true (type: boolean) - minReductionHashAggr: 0.99 + keys: key (type: string) + minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: boolean) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 4 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 + keys: true (type: boolean) + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: boolean) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 5 + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -7166,11 +7052,11 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col2, _col3 - residual filter predicates: {(_col3 > _col1)} + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 > _col2)} Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -7180,7 +7066,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -7204,22 +7090,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 8 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -7227,8 +7097,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY @@ -7249,11 +7118,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) - Reducer 7 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -7278,22 +7144,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Filter Operator - predicate: (key = '90') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = '90') (type: boolean) Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE @@ -7339,11 +7189,11 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col2, _col3 - residual filter predicates: {(_col3 > _col1)} + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 > _col2)} Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -7354,57 +7204,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query44.q.out b/ql/src/test/results/clientpositive/perf/spark/query44.q.out index 38d9f77bde..3e7ff7ffe3 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query44.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join JOIN[35][tables = [$hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 3' is a cross product -Warning: Shuffle Join JOIN[79][tables = [$hdt$_3, $hdt$_4, $hdt$_5]] in Work 'Reducer 16' is a cross product +Warning: Shuffle Join JOIN[19][tables = [$hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[47][tables = [$hdt$_3, $hdt$_4]] in Work 'Reducer 13' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -82,19 +82,17 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 18 (GROUP, 100) - Reducer 12 <- Map 11 (GROUP, 100) - Reducer 13 <- Reducer 12 (GROUP, 1) - Reducer 15 <- Map 1 (GROUP, 199) - Reducer 16 <- Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 15 (PARTITION-LEVEL SORT, 1), Reducer 19 (PARTITION-LEVEL SORT, 1) - Reducer 17 <- Reducer 16 (PARTITION-LEVEL SORT, 1009) - Reducer 19 <- Map 18 (GROUP, 100) + Reducer 10 <- Map 15 (GROUP, 100) + Reducer 12 <- Map 1 (GROUP, 199) + Reducer 13 <- Reducer 12 (PARTITION-LEVEL SORT, 1), Reducer 16 (PARTITION-LEVEL SORT, 1) + Reducer 14 <- Reducer 13 (PARTITION-LEVEL SORT, 1009) + Reducer 16 <- Map 15 (GROUP, 100) Reducer 2 <- Map 1 (GROUP, 199) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 1009), Reducer 4 (PARTITION-LEVEL SORT, 1009) - Reducer 6 <- Map 23 (PARTITION-LEVEL SORT, 1009), Reducer 5 (PARTITION-LEVEL SORT, 1009) - Reducer 7 <- Map 24 (PARTITION-LEVEL SORT, 1009), Reducer 6 (PARTITION-LEVEL SORT, 1009) + Reducer 5 <- Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 4 (PARTITION-LEVEL SORT, 1009) + Reducer 6 <- Map 17 (PARTITION-LEVEL SORT, 1009), Reducer 5 (PARTITION-LEVEL SORT, 1009) + Reducer 7 <- Map 18 (PARTITION-LEVEL SORT, 1009), Reducer 6 (PARTITION-LEVEL SORT, 1009) Reducer 8 <- Reducer 7 (SORT, 1) #### A masked pattern was here #### Vertices: @@ -125,30 +123,7 @@ STAGE PLANS: Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Map 11 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_hdemo_sk is null and (ss_store_sk = 410)) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_hdemo_sk is null and (ss_store_sk = 410)) (type: boolean) - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 18 + Map 15 Map Operator Tree: TableScan alias: store_sales @@ -175,7 +150,7 @@ STAGE PLANS: Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Map 23 + Map 17 Map Operator Tree: TableScan alias: i1 @@ -195,7 +170,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 24 + Map 18 Map Operator Tree: TableScan alias: i2 @@ -232,50 +207,14 @@ STAGE PLANS: predicate: (_col1 is not null and _col2 is not null) (type: boolean) Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (0.9 * (_col1 / _col2)) (type: decimal(38,22)) + expressions: (_col1 / _col2) (type: decimal(37,22)) outputColumnNames: _col0 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(38,22)) + value expressions: _col0 (type: decimal(37,22)) Reducer 12 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 13 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 15 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -295,35 +234,33 @@ STAGE PLANS: sort order: Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) - Reducer 16 + Reducer 13 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10367842752596232 Data size: 1922618777862369774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10367842752596232 Data size: 1839676035841599918 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > _col2) (type: boolean) - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > (0.9 * _col2)) (type: boolean) + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), _col1 (type: decimal(37,22)) sort order: +- Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) - Reducer 17 + Reducer 14 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -344,21 +281,21 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((rank_window_0 < 11) and _col0 is not null) (type: boolean) - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Reducer 19 + Reducer 16 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -375,13 +312,13 @@ STAGE PLANS: predicate: (_col1 is not null and _col2 is not null) (type: boolean) Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (0.9 * (_col1 / _col2)) (type: decimal(38,22)) + expressions: (_col1 / _col2) (type: decimal(37,22)) outputColumnNames: _col0 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(38,22)) + value expressions: _col0 (type: decimal(37,22)) Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -407,21 +344,19 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10367842752596232 Data size: 1922618777862369774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10367842752596232 Data size: 1839676035841599918 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > _col2) (type: boolean) - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > (0.9 * _col2)) (type: boolean) + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), _col1 (type: decimal(37,22)) sort order: ++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Reducer 4 @@ -430,7 +365,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -451,19 +386,19 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((rank_window_0 < 11) and _col0 is not null) (type: boolean) - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1151982528066248 Data size: 213624308651374400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reducer 5 Reduce Operator Tree: @@ -474,12 +409,12 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1267180808338276 Data size: 234986744609712256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Reducer 6 Reduce Operator Tree: @@ -490,12 +425,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1393898919384048 Data size: 258485424673204064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col5 (type: string) Reducer 7 Reduce Operator Tree: @@ -506,15 +441,15 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col5, _col7 - Statistics: Num rows: 1533288844555592 Data size: 284333973303297248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col5 (type: string), _col7 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1533288844555592 Data size: 284333973303297248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1533288844555592 Data size: 284333973303297248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: string) Reducer 8 @@ -523,13 +458,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1533288844555592 Data size: 284333973303297248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 17700 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 18500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 17700 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out index 61c01c5c60..a546472908 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out @@ -19,8 +19,7 @@ POSTHOOK: type: CREATE_MATERIALIZED_VIEW POSTHOOK: Input: default@store_sales POSTHOOK: Output: database:default POSTHOOK: Output: default@mv_store_sales_item_customer -Warning: Shuffle Join MERGEJOIN[153][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[154][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[109][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -98,155 +97,124 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 5 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 16 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 16 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 6 <- Map 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_204] - Limit [LIM_203] (rows=100 width=218) + Reducer 8 vectorized + File Output Operator [FS_146] + Limit [LIM_145] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_202] (rows=6951 width=218) + Select Operator [SEL_144] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_106] - Select Operator [SEL_105] (rows=6951 width=218) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_70] + Select Operator [SEL_69] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_159] (rows=6951 width=218) - Conds:RS_102._col2=RS_201._col0(Inner),Output:["_col1","_col5","_col7"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + Merge Join Operator [MERGEJOIN_113] (rows=6951 width=218) + Conds:RS_66._col2=RS_143._col0(Inner),Output:["_col1","_col5","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_199] (rows=462000 width=111) + Select Operator [SEL_141] (rows=462000 width=111) Output:["_col0","_col1"] - TableScan [TS_92] (rows=462000 width=111) + TableScan [TS_56] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_102] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_66] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_158] (rows=6951 width=115) - Conds:RS_99._col0=RS_200._col0(Inner),Output:["_col1","_col2","_col5"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + Merge Join Operator [MERGEJOIN_112] (rows=6951 width=115) + Conds:RS_63._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_199] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_99] + Please refer to the previous Select Operator [SEL_141] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_157] (rows=6951 width=12) - Conds:RS_193._col1=RS_198._col1(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + Merge Join Operator [MERGEJOIN_111] (rows=6951 width=12) + Conds:RS_135._col1=RS_140._col1(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] PartitionCols:_col1 - Select Operator [SEL_197] (rows=6951 width=8) + Select Operator [SEL_134] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_196] (rows=6951 width=116) + Filter Operator [FIL_133] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_195] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_194] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_85] + PTF Operator [PTF_132] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_131] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] PartitionCols:0 - Filter Operator [FIL_38] (rows=20854 width=228) - predicate:(_col3 > _col1) - Merge Join Operator [MERGEJOIN_154] (rows=62562 width=228) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_188] - Select Operator [SEL_187] (rows=62562 width=116) + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_109] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_130] + Select Operator [SEL_129] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_128] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_127] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_126] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col0 + Group By Operator [GBY_124] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_123] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_122] (rows=287946 width=114) + predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) + TableScan [TS_8] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_121] + Select Operator [SEL_120] (rows=62562 width=116) Output:["_col0","_col1"] - Filter Operator [FIL_186] (rows=62562 width=124) + Filter Operator [FIL_119] (rows=62562 width=124) predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_185] (rows=62562 width=124) + Group By Operator [GBY_118] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_183] (rows=3199976 width=124) + Group By Operator [GBY_116] (rows=3199976 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_182] (rows=6399952 width=114) + Select Operator [SEL_115] (rows=6399952 width=114) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_181] (rows=6399952 width=114) + Filter Operator [FIL_114] (rows=6399952 width=114) predicate:(ss_store_sk = 410) - TableScan [TS_24] (rows=575995635 width=114) + TableScan [TS_0] (rows=575995635 width=114) default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_35] - Merge Join Operator [MERGEJOIN_153] (rows=1 width=112) - Conds:(Inner),Output:["_col1"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_180] - Select Operator [SEL_179] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_178] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_177] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_176] (rows=1 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_175] - PartitionCols:_col0 - Group By Operator [GBY_174] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_173] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_172] (rows=287946 width=114) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_15] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_171] - Select Operator [SEL_170] (rows=1 width=8) - Filter Operator [FIL_169] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_168] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_167] - Group By Operator [GBY_166] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_165] (rows=1 width=4) - Group By Operator [GBY_164] (rows=1 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] - PartitionCols:_col0 - Group By Operator [GBY_162] (rows=18 width=4) - Output:["_col0"],keys:true - Select Operator [SEL_161] (rows=287946 width=7) - Filter Operator [FIL_160] (rows=287946 width=7) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_0] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] PartitionCols:_col1 - Select Operator [SEL_192] (rows=6951 width=8) + Select Operator [SEL_139] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_191] (rows=6951 width=116) + Filter Operator [FIL_138] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_190] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_189] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_39] + PTF Operator [PTF_137] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_136] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_38] + Please refer to the previous Filter Operator [FIL_20] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out index a243417159..7644f5ec61 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out @@ -1,5 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[153][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[154][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[109][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -77,155 +76,124 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 5 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 16 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 16 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 6 <- Map 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_204] - Limit [LIM_203] (rows=100 width=218) + Reducer 8 vectorized + File Output Operator [FS_146] + Limit [LIM_145] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_202] (rows=6951 width=218) + Select Operator [SEL_144] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_106] - Select Operator [SEL_105] (rows=6951 width=218) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_70] + Select Operator [SEL_69] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_159] (rows=6951 width=218) - Conds:RS_102._col2=RS_201._col0(Inner),Output:["_col1","_col5","_col7"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + Merge Join Operator [MERGEJOIN_113] (rows=6951 width=218) + Conds:RS_66._col2=RS_143._col0(Inner),Output:["_col1","_col5","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_199] (rows=462000 width=111) + Select Operator [SEL_141] (rows=462000 width=111) Output:["_col0","_col1"] - TableScan [TS_92] (rows=462000 width=111) + TableScan [TS_56] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_102] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_66] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_158] (rows=6951 width=115) - Conds:RS_99._col0=RS_200._col0(Inner),Output:["_col1","_col2","_col5"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + Merge Join Operator [MERGEJOIN_112] (rows=6951 width=115) + Conds:RS_63._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_199] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_99] + Please refer to the previous Select Operator [SEL_141] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_157] (rows=6951 width=12) - Conds:RS_193._col1=RS_198._col1(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + Merge Join Operator [MERGEJOIN_111] (rows=6951 width=12) + Conds:RS_135._col1=RS_140._col1(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] PartitionCols:_col1 - Select Operator [SEL_197] (rows=6951 width=8) + Select Operator [SEL_134] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_196] (rows=6951 width=116) + Filter Operator [FIL_133] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_195] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_194] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_85] + PTF Operator [PTF_132] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_131] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] PartitionCols:0 - Filter Operator [FIL_38] (rows=20854 width=228) - predicate:(_col3 > _col1) - Merge Join Operator [MERGEJOIN_154] (rows=62562 width=228) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_188] - Select Operator [SEL_187] (rows=62562 width=116) + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_109] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_130] + Select Operator [SEL_129] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_128] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_127] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_126] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col0 + Group By Operator [GBY_124] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_123] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_122] (rows=287946 width=114) + predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) + TableScan [TS_8] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_121] + Select Operator [SEL_120] (rows=62562 width=116) Output:["_col0","_col1"] - Filter Operator [FIL_186] (rows=62562 width=124) + Filter Operator [FIL_119] (rows=62562 width=124) predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_185] (rows=62562 width=124) + Group By Operator [GBY_118] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_183] (rows=3199976 width=124) + Group By Operator [GBY_116] (rows=3199976 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_182] (rows=6399952 width=114) + Select Operator [SEL_115] (rows=6399952 width=114) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_181] (rows=6399952 width=114) + Filter Operator [FIL_114] (rows=6399952 width=114) predicate:(ss_store_sk = 410) - TableScan [TS_24] (rows=575995635 width=114) + TableScan [TS_0] (rows=575995635 width=114) default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_35] - Merge Join Operator [MERGEJOIN_153] (rows=1 width=112) - Conds:(Inner),Output:["_col1"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_180] - Select Operator [SEL_179] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_178] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_177] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_176] (rows=1 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_175] - PartitionCols:_col0 - Group By Operator [GBY_174] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_173] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_172] (rows=287946 width=114) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_15] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_171] - Select Operator [SEL_170] (rows=1 width=8) - Filter Operator [FIL_169] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_168] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_167] - Group By Operator [GBY_166] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_165] (rows=1 width=4) - Group By Operator [GBY_164] (rows=1 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] - PartitionCols:_col0 - Group By Operator [GBY_162] (rows=18 width=4) - Output:["_col0"],keys:true - Select Operator [SEL_161] (rows=287946 width=7) - Filter Operator [FIL_160] (rows=287946 width=7) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_0] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] PartitionCols:_col1 - Select Operator [SEL_192] (rows=6951 width=8) + Select Operator [SEL_139] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_191] (rows=6951 width=116) + Filter Operator [FIL_138] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_190] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_189] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_39] + PTF Operator [PTF_137] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_136] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_38] + Please refer to the previous Filter Operator [FIL_20] diff --git a/ql/src/test/results/clientpositive/perf/tez/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/query44.q.out index 38a281c8f9..adaea8c46e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query44.q.out @@ -1,5 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[151][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[152][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[109][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -77,157 +76,126 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 16 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 10 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 10 <- Reducer 8 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_203] - Limit [LIM_202] (rows=100 width=218) + File Output Operator [FS_147] + Limit [LIM_146] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_201] (rows=6951 width=218) + Select Operator [SEL_145] (rows=6951 width=218) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_109] - Select Operator [SEL_108] (rows=6951 width=218) + SHUFFLE [RS_73] + Select Operator [SEL_72] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_157] (rows=6951 width=218) - Conds:RS_105._col3=RS_106._col3(Inner),Output:["_col1","_col3","_col5"] + Merge Join Operator [MERGEJOIN_113] (rows=6951 width=218) + Conds:RS_69._col3=RS_70._col3(Inner),Output:["_col1","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_105] + SHUFFLE [RS_69] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_153] (rows=6951 width=111) - Conds:RS_160._col0=RS_195._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_110] (rows=6951 width=111) + Conds:RS_116._col0=RS_139._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] + SHUFFLE [RS_116] PartitionCols:_col0 - Select Operator [SEL_159] (rows=462000 width=111) + Select Operator [SEL_115] (rows=462000 width=111) Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=462000 width=111) + Filter Operator [FIL_114] (rows=462000 width=111) predicate:i_item_sk is not null TableScan [TS_0] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] PartitionCols:_col0 - Select Operator [SEL_194] (rows=6951 width=8) + Select Operator [SEL_138] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_193] (rows=6951 width=116) - predicate:((rank_window_0 < 11) and _col2 is not null) - PTF Operator [PTF_192] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_191] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_42] + Filter Operator [FIL_137] (rows=6951 width=116) + predicate:((rank_window_0 < 11) and _col0 is not null) + PTF Operator [PTF_136] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_135] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:0 - Filter Operator [FIL_41] (rows=20854 width=228) - predicate:(_col3 > _col1) - Merge Join Operator [MERGEJOIN_152] (rows=62562 width=228) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_190] - Select Operator [SEL_189] (rows=62562 width=116) + Filter Operator [FIL_23] (rows=20854 width=228) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_109] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_134] + Select Operator [SEL_133] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_132] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_131] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_130] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + PartitionCols:_col0 + Group By Operator [GBY_128] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_127] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_126] (rows=287946 width=114) + predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) + TableScan [TS_11] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_125] + Select Operator [SEL_124] (rows=62562 width=116) Output:["_col0","_col1"] - Filter Operator [FIL_188] (rows=62562 width=124) + Filter Operator [FIL_123] (rows=62562 width=124) predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_187] (rows=62562 width=124) + Group By Operator [GBY_122] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] PartitionCols:_col0 - Group By Operator [GBY_185] (rows=3199976 width=124) + Group By Operator [GBY_120] (rows=3199976 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_184] (rows=6399952 width=114) + Select Operator [SEL_119] (rows=6399952 width=114) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_183] (rows=6399952 width=114) + Filter Operator [FIL_118] (rows=6399952 width=114) predicate:(ss_store_sk = 410) - TableScan [TS_27] (rows=575995635 width=114) + TableScan [TS_3] (rows=575995635 width=114) default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] - <-Reducer 9 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_38] - Merge Join Operator [MERGEJOIN_151] (rows=1 width=112) - Conds:(Inner),Output:["_col1"] - <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_182] - Select Operator [SEL_181] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_180] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_179] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_178] (rows=1 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_177] - PartitionCols:_col0 - Group By Operator [GBY_176] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_175] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_174] (rows=287946 width=114) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_18] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_173] - Select Operator [SEL_172] (rows=1 width=8) - Filter Operator [FIL_171] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_170] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_169] - Group By Operator [GBY_168] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_167] (rows=1 width=4) - Group By Operator [GBY_166] (rows=1 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_165] - PartitionCols:_col0 - Group By Operator [GBY_164] (rows=18 width=4) - Output:["_col0"],keys:true - Select Operator [SEL_163] (rows=287946 width=7) - Filter Operator [FIL_162] (rows=287946 width=7) - predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_3] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_106] + SHUFFLE [RS_70] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_156] (rows=6951 width=111) - Conds:RS_161._col0=RS_200._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_112] (rows=6951 width=111) + Conds:RS_117._col0=RS_144._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] + SHUFFLE [RS_117] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_159] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + Please refer to the previous Select Operator [SEL_115] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] PartitionCols:_col0 - Select Operator [SEL_199] (rows=6951 width=8) + Select Operator [SEL_143] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_198] (rows=6951 width=116) - predicate:((rank_window_0 < 11) and _col2 is not null) - PTF Operator [PTF_197] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_196] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_91] + Filter Operator [FIL_142] (rows=6951 width=116) + predicate:((rank_window_0 < 11) and _col0 is not null) + PTF Operator [PTF_141] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_140] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_41] + Please refer to the previous Filter Operator [FIL_23] diff --git a/ql/src/test/results/clientpositive/spark/cbo_limit.q.out b/ql/src/test/results/clientpositive/spark/cbo_limit.q.out index a7455d9d8f..2c9be5a178 100644 --- a/ql/src/test/results/clientpositive/spark/cbo_limit.q.out +++ b/ql/src/test/results/clientpositive/spark/cbo_limit.q.out @@ -192,6 +192,30 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### +PREHOOK: query: explain cbo select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### +CBO PLAN: +HiveSortLimit(fetch=[0]) + HiveProject(_o__c0=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveProject($f0=[true]) + HiveFilter(condition=[>($3, 1.0E0)]) + HiveTableScan(table=[[default, cbo_t1]], table:alias=[cbo_t1]) + +PREHOOK: query: select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from cbo_t1 where c_float > 1.0 group by true limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +#### A masked pattern was here #### PREHOOK: query: explain cbo select count(*) from cbo_t1 order by sum(c_int), count(*) PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index c30080194f..13ffd59a69 100644 --- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -1476,36 +1476,27 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SORT) Reducer 3 <- Reducer 2 (SORT) -Reducer 4 <- Reducer 3 (SORT) Stage-0 Fetch Operator - limit:5 + limit:-1 Stage-1 - Reducer 4 - File Output Operator [FS_13] - Limit [LIM_12] (rows=5 width=89) - Number of rows:5 - Select Operator [SEL_11] (rows=5 width=89) - Output:["_col0","_col1"] - <-Reducer 3 [SORT] - SORT [RS_10] - Limit [LIM_8] (rows=5 width=89) - Number of rows:5 - Select Operator [SEL_7] (rows=5 width=89) - Output:["_col0","_col1"] - <-Reducer 2 [SORT] - SORT [RS_6] - Limit [LIM_4] (rows=5 width=89) - Number of rows:5 - Select Operator [SEL_3] (rows=20 width=84) - Output:["_col0","_col1"] - <-Map 1 [SORT] - SORT [RS_2] - Select Operator [SEL_1] (rows=20 width=84) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=20 width=84) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + Reducer 3 + File Output Operator [FS_11] + Select Operator [SEL_10] (rows=5 width=89) + Output:["_col0","_col1"] + <-Reducer 2 [SORT] + SORT [RS_9] + Limit [LIM_4] (rows=5 width=89) + Number of rows:5 + Select Operator [SEL_3] (rows=20 width=84) + Output:["_col0","_col1"] + <-Map 1 [SORT] + SORT [RS_2] + Select Operator [SEL_1] (rows=20 width=84) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=20 width=84) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key order by a limit 5) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key order by q/10 desc, r asc limit 5) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c order by cbo_t3.c_int+c desc, c limit 5 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 016e97ffdd..5be59584fc 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -2745,7 +2745,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -2778,17 +2778,11 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2971,7 +2965,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -3004,17 +2998,11 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out index 38284482de..7c13e3a43f 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -3739,17 +3739,11 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -3820,7 +3814,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -4048,17 +4042,11 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -4127,7 +4115,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index 850b4f161e..e443145d3a 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -349,7 +349,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n0 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -460,9 +460,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 0 + Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -470,7 +473,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_name = (select p_name from part_null_n0 where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2374,8 +2377,8 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join JOIN[34][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 8' is a cross product -Warning: Shuffle Join JOIN[45][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -2392,39 +2395,28 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 11 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Reducer 8 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP, 1) + Reducer 8 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s1 - filterExpr: (key = '90') (type: boolean) + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = '90') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized - Map 10 + Map 5 Map Operator Tree: TableScan alias: src @@ -2444,7 +2436,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 5 + Map 7 Map Operator Tree: TableScan alias: s1 @@ -2469,95 +2461,89 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized - Reducer 11 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Reducer 2 - Execution mode: vectorized Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 <> _col2) (type: boolean) + Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 48906 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 48906 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 48906 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 31250 Data size: 6726500 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31250 Data size: 6476500 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > _col1) (type: boolean) - Statistics: Num rows: 10416 Data size: 2242023 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > _col2) (type: boolean) + Statistics: Num rows: 10416 Data size: 2158695 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10416 Data size: 2242023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10416 Data size: 2158695 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10416 Data size: 2242023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10416 Data size: 2158695 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -2581,52 +2567,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 <> _col2) (type: boolean) - Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 97812 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 9 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 48906 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 48906 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 48906 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2634,8 +2574,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[34][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 8' is a cross product -Warning: Shuffle Join JOIN[45][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 4' is a cross product PREHOOK: query: select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -6836,7 +6776,7 @@ POSTHOOK: query: drop table tempty_n0 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tempty_n0 POSTHOOK: Output: default@tempty_n0 -Warning: Shuffle Join JOIN[32][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 group by 4) PREHOOK: type: QUERY @@ -6856,32 +6796,34 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: true (type: boolean) + aggregations: count() + keys: key (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: boolean) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: s1 @@ -6902,92 +6844,47 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Execution mode: vectorized Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 6906 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 4906 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > _col1) (type: boolean) - Statistics: Num rows: 83 Data size: 2292 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > _col2) (type: boolean) + Statistics: Num rows: 83 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 2292 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 1628 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 83 Data size: 2292 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 1628 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -7011,22 +6908,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 8 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -7034,7 +6915,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY @@ -7054,36 +6935,34 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s1 - filterExpr: (key = '90') (type: boolean) + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key = '90') (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized - Map 5 + Map 4 Map Operator Tree: TableScan alias: s1 @@ -7108,92 +6987,47 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Execution mode: vectorized Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 31250 Data size: 945250 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31250 Data size: 695250 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > _col1) (type: boolean) - Statistics: Num rows: 10416 Data size: 315063 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > _col2) (type: boolean) + Statistics: Num rows: 10416 Data size: 231735 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10416 Data size: 315063 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10416 Data size: 231735 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10416 Data size: 315063 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10416 Data size: 231735 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -7217,22 +7051,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 8 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/timestamp.q.out b/ql/src/test/results/clientpositive/timestamp.q.out index aaa9998788..f06b0da823 100644 --- a/ql/src/test/results/clientpositive/timestamp.q.out +++ b/ql/src/test/results/clientpositive/timestamp.q.out @@ -34,7 +34,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -55,7 +54,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: boolean) @@ -66,20 +64,17 @@ STAGE PLANS: expressions: TIMESTAMP'2011-01-01 01:01:01' (type: timestamp) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 5 + limit: -1 Processor Tree: ListSink @@ -98,7 +93,6 @@ UNION ALL SELECT CAST(TIMESTAMP '2011-01-01 01:01:01.000000000' AS TIMESTAMP) AS `$f0` FROM `default`.`src`) AS `t1` GROUP BY TRUE -LIMIT 5 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -130,8 +124,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - TopN: 5 - TopN Hash Memory Usage: 0.1 auto parallelism: false TableScan alias: src @@ -156,8 +148,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 - TopN: 5 - TopN Hash Memory Usage: 0.1 auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -223,35 +213,32 @@ STAGE PLANS: expressions: TIMESTAMP'2011-01-01 01:01:01' (type: timestamp) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types timestamp - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types timestamp + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator - limit: 5 + limit: -1 Processor Tree: ListSink @@ -300,7 +287,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -321,7 +307,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: boolean) @@ -332,20 +317,17 @@ STAGE PLANS: expressions: TIMESTAMP'2011-01-01 01:01:01.123' (type: timestamp) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 5 + limit: -1 Processor Tree: ListSink