diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveSqCountCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveSqCountCheck.java index f0f7094e35..ba2e38d255 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveSqCountCheck.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveSqCountCheck.java @@ -32,8 +32,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlKind; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.*; import java.util.List; import java.util.NavigableMap; @@ -54,24 +53,20 @@ //match if there is filter (sq_count_check) as right input of a join which is left // input of another join public HiveRemoveSqCountCheck() { - super(operand(Join.class, - some( + super( operand(Project.class, operand(Join.class, some( operand(RelNode.class, any()), - operand(Filter.class, any()))) + operand(Filter.class, + operand(Aggregate.class, any())))) ), - operand(Project.class, - operand(Aggregate.class, - any())) - ) - ), HiveRelFactories.HIVE_BUILDER, "HiveRemoveSqCountCheck"); + HiveRelFactories.HIVE_BUILDER, "HiveRemoveSqCountCheck"); } @Override public boolean matches(RelOptRuleCall call) { - final RelNode filter = call.rel(4); + final RelNode filter = call.rel(3); if(filter instanceof HiveFilter) { HiveFilter hiveFilter = (HiveFilter)filter; // check if it has sq_count_check @@ -133,9 +128,10 @@ private boolean isAggWithConstantGbyKeys(final Aggregate aggregate, RelOptRuleCa } @Override public void onMatch(RelOptRuleCall call) { - final Join topJoin= call.rel(0); - final Join join = call.rel(2); - final Aggregate aggregate = call.rel(6); + final Project project = call.rel(0); + final Join join = call.rel(1); + final Aggregate aggregate = (Aggregate)call.rel(4); + // in presence of grouping sets we can't remove sq_count_check if(aggregate.indicator) { @@ -143,9 +139,8 @@ private boolean isAggWithConstantGbyKeys(final Aggregate aggregate, RelOptRuleCa } if(isAggregateWithoutGbyKeys(aggregate) || isAggWithConstantGbyKeys(aggregate, call)) { // join(left, join.getRight) - RelNode newJoin = HiveJoin.getJoin(topJoin.getCluster(), join.getLeft(), topJoin.getRight(), - topJoin.getCondition(), topJoin.getJoinType()); - call.transformTo(newJoin); + RelNode newProject = call.builder().push(join.getLeft()).project(project.getProjects()).build(); + call.transformTo(newProject); } } } diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 0d8ff14b46..df719b5e41 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -214,8 +214,7 @@ POSTHOOK: Input: default@part_null_n0 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -226,8 +225,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@tempty_n0 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -247,31 +245,29 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Map 5 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) - Reducer 4 <- Map 6 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: tempty_n0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE + alias: part + filterExpr: UDFToDouble(p_size) is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: UDFToDouble(p_size) is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: tempty_n0 @@ -290,57 +286,7 @@ STAGE PLANS: value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: part - filterExpr: UDFToDouble(p_size) is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 95 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 95 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: double) - Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -349,16 +295,16 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - residual filter predicates: {(_col11 > _col1)} - Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + residual filter predicates: {(_col9 > _col10)} + Statistics: Num rows: 8 Data size: 5712 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 8 Data size: 5712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 8 Data size: 5712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2438,9 +2384,8 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[58][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -2458,54 +2403,29 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 5 <- Reducer 4 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (XPROD_EDGE), Reducer 11 (XPROD_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s1 - filterExpr: (key = '90') (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = '90') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: true (type: boolean) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 10 + Map 5 Map Operator Tree: TableScan alias: src @@ -2529,66 +2449,74 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) + alias: s1 + filterExpr: (key = '90') (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '90') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: true (type: boolean) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 11 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + keys: _col0 (type: string) minReductionHashAggr: 0.0 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: bigint) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2598,26 +2526,11 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2, _col3 - residual filter predicates: {(_col3 > _col1)} + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 > _col2)} Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2628,6 +2541,18 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2651,51 +2576,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 8 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - residual filter predicates: {(_col1 <> _col2)} - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2703,9 +2583,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[58][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product -Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -5765,8 +5644,7 @@ having count(*) > (select count(*) from src s1 where s1.key > '9' ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size > (select max(p_size) from part group by p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -5784,14 +5662,30 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) - Reducer 4 <- Map 8 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: part + filterExpr: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: part @@ -5815,72 +5709,7 @@ STAGE PLANS: value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_type (type: string) - outputColumnNames: p_type - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 8 - Map Operator Tree: - TableScan - alias: part - filterExpr: p_size is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_size is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -5889,26 +5718,11 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - residual filter predicates: {(_col7 > _col0)} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + residual filter predicates: {(_col5 > _col9)} Statistics: Num rows: 112 Data size: 69776 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 112 Data size: 69328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -5918,42 +5732,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator + aggregations: max(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 7 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Stage: Stage-0 Fetch Operator @@ -7001,8 +6803,7 @@ POSTHOOK: query: drop table tempty_n0 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tempty_n0 POSTHOOK: Output: default@tempty_n0 -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 group by 4) PREHOOK: type: QUERY @@ -7023,122 +6824,73 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 5 <- Reducer 4 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) - Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - keys: true (type: boolean) + keys: key (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: boolean) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 4 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - keys: key (type: string) + keys: true (type: boolean) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: boolean) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 5 + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -7147,11 +6899,11 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col2, _col3 - residual filter predicates: {(_col3 > _col1)} + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 > _col2)} Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -7161,7 +6913,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -7185,22 +6937,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 8 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -7208,8 +6944,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY @@ -7230,11 +6965,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) - Reducer 7 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -7259,22 +6991,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Filter Operator - predicate: (key = '90') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: true (type: boolean) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = '90') (type: boolean) Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE @@ -7320,11 +7036,11 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col2, _col3 - residual filter predicates: {(_col3 > _col1)} + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 > _col2)} Statistics: Num rows: 83 Data size: 8549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col3 (type: bigint) + expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -7335,57 +7051,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: boolean) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -7488,8 +7153,7 @@ HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Shuffle Join MERGEJOIN[77][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain cbo with avg_sales as (select avg(quantity*list_price) over( partition by list_price) average_sales from (select ss_quantity quantity @@ -7518,32 +7182,20 @@ POSTHOOK: Input: default@store_sales #### A masked pattern was here #### CBO PLAN: HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(avg_window_0=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveProject(avg_window_0=[avg(*(CAST($1):DECIMAL(10, 0), $2)) OVER (PARTITION BY $2 ORDER BY $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(avg_window_0=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveProject(avg_window_0=[avg(*(CAST($1):DECIMAL(10, 0), $2)) OVER (PARTITION BY $2 ORDER BY $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) PREHOOK: query: DROP TABLE store_sales PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out index 66915cc267..12c1d999a6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out @@ -19,8 +19,7 @@ POSTHOOK: type: CREATE_MATERIALIZED_VIEW POSTHOOK: Input: default@store_sales POSTHOOK: Output: database:default POSTHOOK: Output: default@mv_store_sales_item_customer -Warning: Shuffle Join MERGEJOIN[153][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[154][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[109][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -98,155 +97,124 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 5 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 16 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 16 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 6 <- Map 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_204] - Limit [LIM_203] (rows=100 width=218) + Reducer 8 vectorized + File Output Operator [FS_146] + Limit [LIM_145] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_202] (rows=6951 width=218) + Select Operator [SEL_144] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_106] - Select Operator [SEL_105] (rows=6951 width=218) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_70] + Select Operator [SEL_69] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_159] (rows=6951 width=218) - Conds:RS_102._col2=RS_201._col0(Inner),Output:["_col1","_col5","_col7"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + Merge Join Operator [MERGEJOIN_113] (rows=6951 width=218) + Conds:RS_66._col2=RS_143._col0(Inner),Output:["_col1","_col5","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_199] (rows=462000 width=111) + Select Operator [SEL_141] (rows=462000 width=111) Output:["_col0","_col1"] - TableScan [TS_92] (rows=462000 width=111) + TableScan [TS_56] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_102] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_66] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_158] (rows=6951 width=115) - Conds:RS_99._col0=RS_200._col0(Inner),Output:["_col1","_col2","_col5"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + Merge Join Operator [MERGEJOIN_112] (rows=6951 width=115) + Conds:RS_63._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_199] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_99] + Please refer to the previous Select Operator [SEL_141] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_157] (rows=6951 width=12) - Conds:RS_193._col1=RS_198._col1(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + Merge Join Operator [MERGEJOIN_111] (rows=6951 width=12) + Conds:RS_135._col1=RS_140._col1(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] PartitionCols:_col1 - Select Operator [SEL_197] (rows=6951 width=8) + Select Operator [SEL_134] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_196] (rows=6951 width=116) + Filter Operator [FIL_133] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_195] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_194] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_85] + PTF Operator [PTF_132] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"0"}] + Select Operator [SEL_131] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] PartitionCols:0 - Filter Operator [FIL_38] (rows=20854 width=228) - predicate:(_col3 > _col1) - Merge Join Operator [MERGEJOIN_154] (rows=62562 width=228) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_188] - Select Operator [SEL_187] (rows=62562 width=116) + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > _col2) + Merge Join Operator [MERGEJOIN_109] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_130] + Select Operator [SEL_129] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_128] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_127] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_126] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col0 + Group By Operator [GBY_124] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_123] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_122] (rows=287946 width=114) + predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) + TableScan [TS_8] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_121] + Select Operator [SEL_120] (rows=62562 width=116) Output:["_col0","_col1"] - Filter Operator [FIL_186] (rows=62562 width=124) + Filter Operator [FIL_119] (rows=62562 width=124) predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_185] (rows=62562 width=124) + Group By Operator [GBY_118] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_183] (rows=3199976 width=124) + Group By Operator [GBY_116] (rows=3199976 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_182] (rows=6399952 width=114) + Select Operator [SEL_115] (rows=6399952 width=114) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_181] (rows=6399952 width=114) + Filter Operator [FIL_114] (rows=6399952 width=114) predicate:(ss_store_sk = 410) - TableScan [TS_24] (rows=575995635 width=114) + TableScan [TS_0] (rows=575995635 width=114) default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_35] - Merge Join Operator [MERGEJOIN_153] (rows=1 width=112) - Conds:(Inner),Output:["_col1"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_180] - Select Operator [SEL_179] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_178] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_177] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_176] (rows=1 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_175] - PartitionCols:_col0 - Group By Operator [GBY_174] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_173] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_172] (rows=287946 width=114) - predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - TableScan [TS_15] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_171] - Select Operator [SEL_170] (rows=1 width=8) - Filter Operator [FIL_169] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_168] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_167] - Group By Operator [GBY_166] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_165] (rows=1 width=4) - Group By Operator [GBY_164] (rows=1 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] - PartitionCols:_col0 - Group By Operator [GBY_162] (rows=18 width=4) - Output:["_col0"],keys:true - Select Operator [SEL_161] (rows=287946 width=7) - Filter Operator [FIL_160] (rows=287946 width=7) - predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - TableScan [TS_0] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] PartitionCols:_col1 - Select Operator [SEL_192] (rows=6951 width=8) + Select Operator [SEL_139] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_191] (rows=6951 width=116) + Filter Operator [FIL_138] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_190] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST","partition by:":"0"}] - Select Operator [SEL_189] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_39] + PTF Operator [PTF_137] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_136] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_38] + Please refer to the previous Filter Operator [FIL_20] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out index cfd99fbaed..46db123a37 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out @@ -1,5 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[153][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[154][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[109][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -77,155 +76,124 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 5 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 16 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 16 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 6 <- Map 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_204] - Limit [LIM_203] (rows=100 width=218) + Reducer 8 vectorized + File Output Operator [FS_146] + Limit [LIM_145] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_202] (rows=6951 width=218) + Select Operator [SEL_144] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_106] - Select Operator [SEL_105] (rows=6951 width=218) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_70] + Select Operator [SEL_69] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_159] (rows=6951 width=218) - Conds:RS_102._col2=RS_201._col0(Inner),Output:["_col1","_col5","_col7"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + Merge Join Operator [MERGEJOIN_113] (rows=6951 width=218) + Conds:RS_66._col2=RS_143._col0(Inner),Output:["_col1","_col5","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_199] (rows=462000 width=111) + Select Operator [SEL_141] (rows=462000 width=111) Output:["_col0","_col1"] - TableScan [TS_92] (rows=462000 width=111) + TableScan [TS_56] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_102] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_66] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_158] (rows=6951 width=115) - Conds:RS_99._col0=RS_200._col0(Inner),Output:["_col1","_col2","_col5"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + Merge Join Operator [MERGEJOIN_112] (rows=6951 width=115) + Conds:RS_63._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_199] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_99] + Please refer to the previous Select Operator [SEL_141] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_157] (rows=6951 width=12) - Conds:RS_193._col1=RS_198._col1(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + Merge Join Operator [MERGEJOIN_111] (rows=6951 width=12) + Conds:RS_135._col1=RS_140._col1(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] PartitionCols:_col1 - Select Operator [SEL_197] (rows=6951 width=8) + Select Operator [SEL_134] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_196] (rows=6951 width=116) + Filter Operator [FIL_133] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_195] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_194] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_85] + PTF Operator [PTF_132] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"0"}] + Select Operator [SEL_131] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] PartitionCols:0 - Filter Operator [FIL_38] (rows=20854 width=228) - predicate:(_col3 > _col1) - Merge Join Operator [MERGEJOIN_154] (rows=62562 width=228) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_188] - Select Operator [SEL_187] (rows=62562 width=116) + Filter Operator [FIL_20] (rows=20854 width=228) + predicate:(_col1 > _col2) + Merge Join Operator [MERGEJOIN_109] (rows=62562 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_130] + Select Operator [SEL_129] (rows=1 width=112) + Output:["_col0"] + Filter Operator [FIL_128] (rows=1 width=120) + predicate:(_col1 is not null and _col2 is not null) + Select Operator [SEL_127] (rows=1 width=120) + Output:["_col1","_col2"] + Group By Operator [GBY_126] (rows=1 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col0 + Group By Operator [GBY_124] (rows=258 width=124) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true + Select Operator [SEL_123] (rows=287946 width=114) + Output:["_col1"] + Filter Operator [FIL_122] (rows=287946 width=114) + predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) + TableScan [TS_8] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_121] + Select Operator [SEL_120] (rows=62562 width=116) Output:["_col0","_col1"] - Filter Operator [FIL_186] (rows=62562 width=124) + Filter Operator [FIL_119] (rows=62562 width=124) predicate:(_col1 is not null and _col2 is not null) - Group By Operator [GBY_185] (rows=62562 width=124) + Group By Operator [GBY_118] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_183] (rows=3199976 width=124) + Group By Operator [GBY_116] (rows=3199976 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_182] (rows=6399952 width=114) + Select Operator [SEL_115] (rows=6399952 width=114) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_181] (rows=6399952 width=114) + Filter Operator [FIL_114] (rows=6399952 width=114) predicate:(ss_store_sk = 410) - TableScan [TS_24] (rows=575995635 width=114) + TableScan [TS_0] (rows=575995635 width=114) default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_35] - Merge Join Operator [MERGEJOIN_153] (rows=1 width=112) - Conds:(Inner),Output:["_col1"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_180] - Select Operator [SEL_179] (rows=1 width=112) - Output:["_col0"] - Filter Operator [FIL_178] (rows=1 width=120) - predicate:(_col1 is not null and _col2 is not null) - Select Operator [SEL_177] (rows=1 width=120) - Output:["_col1","_col2"] - Group By Operator [GBY_176] (rows=1 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_175] - PartitionCols:_col0 - Group By Operator [GBY_174] (rows=258 width=124) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_173] (rows=287946 width=114) - Output:["_col1"] - Filter Operator [FIL_172] (rows=287946 width=114) - predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - TableScan [TS_15] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_171] - Select Operator [SEL_170] (rows=1 width=8) - Filter Operator [FIL_169] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_168] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_167] - Group By Operator [GBY_166] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_165] (rows=1 width=4) - Group By Operator [GBY_164] (rows=1 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] - PartitionCols:_col0 - Group By Operator [GBY_162] (rows=18 width=4) - Output:["_col0"],keys:true - Select Operator [SEL_161] (rows=287946 width=7) - Filter Operator [FIL_160] (rows=287946 width=7) - predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - TableScan [TS_0] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] PartitionCols:_col1 - Select Operator [SEL_192] (rows=6951 width=8) + Select Operator [SEL_139] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_191] (rows=6951 width=116) + Filter Operator [FIL_138] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_190] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST","partition by:":"0"}] - Select Operator [SEL_189] (rows=20854 width=116) - Output:["_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_39] + PTF Operator [PTF_137] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_136] (rows=20854 width=116) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:0 - Please refer to the previous Filter Operator [FIL_38] + Please refer to the previous Filter Operator [FIL_20]