diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveSqCountCheck.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveSqCountCheck.java new file mode 100644 index 0000000000..0357a7f644 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRemoveSqCountCheck.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; + +import java.util.List; +import java.util.NavigableMap; +import java.util.TreeMap; + +public class HiveRemoveSqCountCheck extends RelOptRule { + + public static final HiveRemoveSqCountCheck INSTANCE = + new HiveRemoveSqCountCheck(); + + //match if there is filter (sq_count_check) as right input of a join which is left + // input of another join + public HiveRemoveSqCountCheck() { + super(operand(Join.class, + some( + operand(Project.class, + operand(Join.class, + some( + operand(RelNode.class, any()), + operand(Filter.class, any()))) + ), + operand(Project.class, + operand(Aggregate.class, + any()))) + ), HiveRelFactories.HIVE_BUILDER, "HiveRemoveSqCountCheck"); + } + + @Override + public boolean matches(RelOptRuleCall call) { + final RelNode filter = call.rel(4); + if(filter instanceof HiveFilter) { + HiveFilter hiveFilter = (HiveFilter)filter; + // check if it has sq_count_check + if(isSqlCountCheck(hiveFilter)) { + return true; + } + } + // Rule cannot be applied if there are GroupingSets + return false; + } + + private boolean isSqlCountCheck(final HiveFilter filter) { + // look at hivesubqueryremoverule to see how is this filter created + if(filter.getCondition() instanceof RexCall) { + final RexCall condition = (RexCall)filter.getCondition(); + if(condition.getKind() == SqlKind.LESS_THAN_OR_EQUAL) { + final List operands = condition.getOperands(); + if(operands.get(0) instanceof RexCall) { + final RexCall op = (RexCall)operands.get(0); + if(op.getOperator().getName().equals("sq_count_check")) { + return true; + } + } + } + } + return false; + } + + + @Override public void onMatch(RelOptRuleCall call) { + final Join topJoin= call.rel(0); + final Join join = call.rel(2); + final Aggregate aggregate = call.rel(6); + + // in presence of grouping sets we can't remove sq_count_check + if(aggregate.indicator) return ; + + final int groupCount = aggregate.getGroupCount(); + + final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); + final RelMetadataQuery mq = RelMetadataQuery.instance(); + final RelOptPredicateList predicates = + mq.getPulledUpPredicates(aggregate.getInput()); + if (predicates == null) { + return; + } + final NavigableMap map = new TreeMap<>(); + for (int key : aggregate.getGroupSet()) { + final RexInputRef ref = + rexBuilder.makeInputRef(aggregate.getInput(), key); + if (predicates.constantMap.containsKey(ref)) { + map.put(key, predicates.constantMap.get(ref)); + } + } + + // None of the group expressions are constant. Nothing to do. + if (map.isEmpty()) { + return; + } + + if (groupCount == map.size()) { + // join(left, join.getRight) + RelNode newJoin = HiveJoin.getJoin(topJoin.getCluster(), join.getLeft(), topJoin.getRight(), + topJoin.getCondition(), topJoin.getJoinType()); + call.transformTo(newJoin); + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index 83d3f7436d..425df28816 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -175,6 +175,11 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, // if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check // since it is guaranteed to produce at most one row if(!hasNoWindowingAndNoGby) { + // we want to have project after join since sq_count_check's count() expression wouldn't + // be needed further up + final List parentQueryFields = new ArrayList<>(); + parentQueryFields.addAll(builder.fields()); + builder.push(e.rel); // returns single row/column builder.aggregate(builder.groupKey(), builder.count(false, "cnt")); @@ -192,7 +197,9 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, } else builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); - offset++; + builder.project(parentQueryFields); + + //offset++; } if(isCorrScalarAgg) { // Transformation : diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 348331e052..dc2ac49fab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -200,6 +200,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsWithStatsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRemoveSqCountCheck; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule; @@ -1526,6 +1527,12 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Semijoin conversion"); } + // 8. Get rid of sq_count_check if group by key is constant (HIVE-) + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, HiveRemoveSqCountCheck.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Removing sq_count_check UDF "); + + // 8. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) diff --git a/ql/src/test/results/clientnegative/subquery_scalar_multi_rows.q.out b/ql/src/test/results/clientnegative/subquery_scalar_multi_rows.q.out index 0a780db7ef..e40e2fe808 100644 --- a/ql/src/test/results/clientnegative/subquery_scalar_multi_rows.q.out +++ b/ql/src/test/results/clientnegative/subquery_scalar_multi_rows.q.out @@ -1,4 +1,5 @@ -Warning: Shuffle Join JOIN[15][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select p_name from part where p_size > (select p_size from part) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index e94edff262..a43c6870f8 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -194,7 +194,8 @@ POSTHOOK: Input: default@part_null 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -Warning: Shuffle Join MERGEJOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_size > (select * from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -205,7 +206,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@tempty #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size > (select * from tempty) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size > (select * from tempty) @@ -219,8 +221,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -238,7 +241,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: tempty @@ -256,7 +259,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: tempty @@ -277,28 +280,41 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16328 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16120 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (UDFToDouble(_col5) > UDFToDouble(_col10)) (type: boolean) - Statistics: Num rows: 8 Data size: 5024 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(_col5) > UDFToDouble(_col9)) (type: boolean) + Statistics: Num rows: 8 Data size: 4960 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 4960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 5024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 4960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -321,7 +337,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null where p_name is null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_name = (select p_name from part_null where p_name is null) @@ -335,8 +352,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 4 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -357,7 +375,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: part_null @@ -392,25 +410,42 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 591 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 591 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 591 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: double), _col7 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 16 Data size: 11100 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col0 (type: int), null (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: double), _col7 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 11100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 11100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -433,7 +468,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_name = (select p_name from part_null where p_name is null) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -444,8 +480,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where (select i from tnull limit 1) is null PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where (select i from tnull limit 1) is null @@ -539,10 +575,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 26 Data size: 16250 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col10 is null (type: boolean) + predicate: _col9 is null (type: boolean) Statistics: Num rows: 13 Data size: 8125 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -607,8 +643,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where (select i from tnull limit 1) is null PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -941,7 +977,8 @@ POSTHOOK: Input: default@part 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl 49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -959,12 +996,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 3 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 4 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -982,7 +1020,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: part @@ -1005,15 +1043,28 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col0, _col1, _col2, _col4 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col2 > _col4) (type: boolean) + predicate: (_col2 > _col3) (type: boolean) Statistics: Num rows: 8 Data size: 1816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) @@ -1026,7 +1077,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1061,7 +1112,7 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1078,7 +1129,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1094,7 +1145,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1130,7 +1181,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 - Reducer 8 + Reducer 9 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1151,7 +1202,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -2419,8 +2471,8 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[50][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) @@ -2434,13 +2486,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 7 (SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2489,21 +2539,6 @@ STAGE PLANS: Select Operator Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: '90' (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key = '90') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator aggregations: count() keys: '90' (type: string) mode: hash @@ -2517,23 +2552,6 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2583,15 +2601,13 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 205 Data size: 21115 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col1 > _col3) (type: boolean) + predicate: (_col1 > _col2) (type: boolean) Statistics: Num rows: 68 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) @@ -2620,37 +2636,19 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) + expressions: _col1 (type: bigint) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Stage: Stage-0 Fetch Operator @@ -2658,8 +2656,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[50][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -2812,7 +2810,8 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### 2346.3 3461.37 -Warning: Shuffle Join MERGEJOIN[77][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[80][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[81][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part) @@ -2826,18 +2825,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 11 (CONTAINS) - Reducer 12 <- Union 11 (SIMPLE_EDGE) - Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) - Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 16 <- Map 13 (CUSTOM_SIMPLE_EDGE) - Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Union 11 (CONTAINS) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 7 <- Union 6 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 10 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 12 (CONTAINS) + Reducer 13 <- Union 12 (SIMPLE_EDGE) + Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) + Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 17 <- Map 14 (CUSTOM_SIMPLE_EDGE) + Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 12 (CONTAINS) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 8 <- Union 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2855,7 +2855,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 13 + Map 14 Map Operator Tree: TableScan alias: part @@ -2888,7 +2888,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: part @@ -2926,6 +2926,26 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 @@ -2942,7 +2962,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 12 + Reducer 13 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2962,7 +2982,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 14 + Reducer 15 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2982,7 +3002,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 15 + Reducer 16 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3003,7 +3023,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 16 + Reducer 17 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3023,7 +3043,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 17 + Reducer 18 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3050,15 +3070,28 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToLong(_col5) > _col10) (type: boolean) + predicate: (UDFToLong(_col5) > _col9) (type: boolean) Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -3071,7 +3104,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3091,7 +3124,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3112,7 +3145,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3139,7 +3172,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 8 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3155,30 +3188,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Union 11 - Vertex: Union 11 - Union 6 - Vertex: Union 6 + Union 12 + Vertex: Union 12 + Union 7 + Vertex: Union 7 Stage: Stage-0 Fetch Operator @@ -3186,7 +3199,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[77][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[80][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[81][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -5687,7 +5701,8 @@ having count(*) > (select count(*) from src s1 where s1.key > '9' ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size > (select max(p_size) from part group by p_type) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size > (select max(p_size) from part group by p_type) @@ -5701,10 +5716,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5722,7 +5738,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: part @@ -5743,7 +5759,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: part @@ -5772,15 +5788,28 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 338 Data size: 210574 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col5 > _col10) (type: boolean) + predicate: (_col5 > _col9) (type: boolean) Statistics: Num rows: 112 Data size: 69776 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -5793,7 +5822,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5812,7 +5841,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5828,7 +5857,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5865,9 +5894,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5887,7 +5917,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: p @@ -5907,7 +5937,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: p @@ -5935,26 +5965,43 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col4 (type: string) 1 _col0 (type: string) - 2 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11 - residual filter predicates: {(_col5 > _col11)} - Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 12 Data size: 7476 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col5 > _col9) (type: boolean) + Statistics: Num rows: 4 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5980,7 +6027,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 202980e975..6fc6d070e4 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -2863,8 +2863,8 @@ POSTHOOK: Input: default@part 6 false 18 false 45 false -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part PREHOOK: type: QUERY POSTHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part @@ -2948,10 +2948,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), (_col2 = 1) (type: boolean) + expressions: _col0 (type: string), (_col1 = 1) (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -3015,8 +3015,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4825,7 +4825,7 @@ POSTHOOK: Input: default@part 2 28 46 28 23 28 -Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[64][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select t1.p_size, (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type and (select sum(p_size) from part a1 where a1.p_partkey = p.p_partkey @@ -4847,12 +4847,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4870,7 +4871,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 10 + Map 11 Map Operator Tree: TableScan alias: a1 @@ -4912,7 +4913,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: pp @@ -4931,7 +4932,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 8 + Map 9 Map Operator Tree: TableScan alias: a1 @@ -4951,7 +4952,33 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 11 + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5016,11 +5043,25 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -5031,7 +5072,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5043,32 +5084,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -5076,7 +5091,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[60][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[64][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select t1.p_size, (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type and (select sum(p_size) from part a1 where a1.p_partkey = p.p_partkey @@ -5119,7 +5134,7 @@ POSTHOOK: Input: default@part 2 28 46 28 23 28 -Warning: Shuffle Join MERGEJOIN[90][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[98][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type @@ -5143,15 +5158,17 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 7 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Map 7 (SIMPLE_EDGE) + Reducer 10 <- Map 8 (SIMPLE_EDGE) + Reducer 11 <- Map 8 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5186,7 +5203,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 11 + Map 13 Map Operator Tree: TableScan alias: p @@ -5206,7 +5223,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 13 + Map 15 Map Operator Tree: TableScan alias: pp @@ -5225,7 +5242,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: t2 @@ -5277,6 +5294,25 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 @@ -5304,6 +5340,22 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 14 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) @@ -5321,17 +5373,17 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col1 (type: int) - outputColumnNames: _col1, _col4 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col4 (type: bigint) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -5339,22 +5391,37 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 26 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: 0 1 - outputColumnNames: _col1, _col4, _col6 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: int), _col4 (type: bigint), _col6 (type: bigint) + expressions: _col1 (type: int), _col2 (type: bigint), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5379,17 +5446,15 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 - Inner Join 0 to 2 + Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -5400,7 +5465,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5412,7 +5477,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 8 + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5438,25 +5503,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Stage: Stage-0 Fetch Operator @@ -5464,7 +5510,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[90][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[98][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out b/ql/src/test/results/clientpositive/perf/query23.q.out index 1fd8cb4f25..7998560217 100644 --- a/ql/src/test/results/clientpositive/perf/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/query23.q.out @@ -1,5 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[373][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[374][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[376][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 27' is a cross product +Warning: Shuffle Join MERGEJOIN[377][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 28' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -105,7 +107,7 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 30 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 10 <- Reducer 28 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) Reducer 13 <- Map 19 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) @@ -113,32 +115,34 @@ Reducer 15 <- Map 11 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) Reducer 16 <- Map 19 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 22 <- Map 40 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Reducer 24 (CUSTOM_SIMPLE_EDGE), Reducer 35 (CUSTOM_SIMPLE_EDGE), Reducer 42 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 20 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 27 <- Map 40 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (SIMPLE_EDGE) -Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE), Reducer 35 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 20 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (CUSTOM_SIMPLE_EDGE), Reducer 38 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE), Reducer 41 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 39 (CUSTOM_SIMPLE_EDGE), Reducer 44 (CUSTOM_SIMPLE_EDGE) -Reducer 32 <- Map 31 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) -Reducer 33 <- Map 40 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 30 <- Map 29 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 29 (SIMPLE_EDGE), Reducer 46 (SIMPLE_EDGE) Reducer 34 <- Reducer 33 (SIMPLE_EDGE) Reducer 35 <- Reducer 34 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Map 31 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) -Reducer 37 <- Map 40 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) -Reducer 38 <- Reducer 37 (SIMPLE_EDGE) -Reducer 39 <- Reducer 38 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 25 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 46 (SIMPLE_EDGE) -Reducer 42 <- Reducer 41 (SIMPLE_EDGE) -Reducer 43 <- Map 40 (SIMPLE_EDGE), Map 46 (SIMPLE_EDGE) -Reducer 44 <- Reducer 43 (SIMPLE_EDGE) +Reducer 36 <- Map 29 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) +Reducer 37 <- Reducer 36 (SIMPLE_EDGE) +Reducer 38 <- Reducer 37 (CUSTOM_SIMPLE_EDGE) +Reducer 39 <- Map 29 (SIMPLE_EDGE), Reducer 47 (SIMPLE_EDGE) +Reducer 4 <- Reducer 24 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 40 <- Reducer 39 (SIMPLE_EDGE) +Reducer 41 <- Reducer 40 (CUSTOM_SIMPLE_EDGE) +Reducer 43 <- Map 42 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) +Reducer 44 <- Map 42 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) +Reducer 46 <- Map 45 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE) +Reducer 47 <- Map 45 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE) Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 47 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 8 <- Map 49 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 17 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 @@ -146,402 +150,410 @@ Stage-0 limit:100 Stage-1 Reducer 6 - File Output Operator [FS_246] - Limit [LIM_245] (rows=1 width=112) + File Output Operator [FS_252] + Limit [LIM_251] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_243] (rows=1 width=112) + Group By Operator [GBY_249] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] <-Reducer 10 [CONTAINS] - Reduce Output Operator [RS_242] - Group By Operator [GBY_241] (rows=1 width=112) + Reduce Output Operator [RS_248] + Group By Operator [GBY_247] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_237] (rows=191667562 width=135) + Select Operator [SEL_243] (rows=191667562 width=135) Output:["_col0"] - Merge Join Operator [MERGEJOIN_370] (rows=191667562 width=135) - Conds:RS_234._col2=RS_235._col0(Inner),Output:["_col3","_col4"] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_235] + Merge Join Operator [MERGEJOIN_378] (rows=191667562 width=135) + Conds:RS_240._col2=RS_241._col0(Inner),Output:["_col3","_col4"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_241] PartitionCols:_col0 - Select Operator [SEL_227] (rows=105599202 width=321) + Select Operator [SEL_233] (rows=105599202 width=322) Output:["_col0"] - Filter Operator [FIL_226] (rows=105599202 width=321) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_369] (rows=316797606 width=321) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_222] - Select Operator [SEL_180] (rows=1 width=8) - Filter Operator [FIL_179] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_177] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_176] - Group By Operator [GBY_175] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_172] (rows=348477374 width=88) - Group By Operator [GBY_171] (rows=348477374 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 27 [SIMPLE_EDGE] - SHUFFLE [RS_170] + Filter Operator [FIL_232] (rows=105599202 width=322) + predicate:(_col1 > (0.95 * _col2)) + Merge Join Operator [MERGEJOIN_377] (rows=316797606 width=322) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_229] + Merge Join Operator [MERGEJOIN_376] (rows=316797606 width=97) + Conds:(Inner),Output:["_col0","_col1"] + <-Reducer 26 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_199] + Group By Operator [GBY_168] (rows=316797606 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_167] + PartitionCols:_col0 + Group By Operator [GBY_166] (rows=633595212 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_164] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_366] (rows=633595212 width=88) + Conds:RS_161._col0=RS_162._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 29 [SIMPLE_EDGE] + SHUFFLE [RS_162] PartitionCols:_col0 - Group By Operator [GBY_169] (rows=696954748 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_167] (rows=696954748 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_361] (rows=696954748 width=88) - Conds:RS_164._col1=RS_165._col0(Inner),Output:["_col6"] - <-Map 40 [SIMPLE_EDGE] - SHUFFLE [RS_165] - PartitionCols:_col0 - Select Operator [SEL_160] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_341] (rows=80000000 width=860) - predicate:c_customer_sk is not null - TableScan [TS_39] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_164] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_360] (rows=633595212 width=88) - Conds:RS_161._col0=RS_162._col0(Inner),Output:["_col1"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_162] + Select Operator [SEL_160] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_346] (rows=80000000 width=860) + predicate:c_customer_sk is not null + TableScan [TS_36] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_157] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_345] (rows=575995635 width=88) + predicate:ss_customer_sk is not null + TableScan [TS_33] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 38 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_200] + Select Operator [SEL_198] (rows=1 width=8) + Filter Operator [FIL_197] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_195] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 37 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_194] + Group By Operator [GBY_193] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_190] (rows=348477374 width=88) + Group By Operator [GBY_189] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_188] + PartitionCols:_col0 + Group By Operator [GBY_187] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_185] (rows=696954748 width=88) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_368] (rows=696954748 width=88) + Conds:RS_182._col1=RS_183._col0(Inner),Output:["_col6"] + <-Map 29 [SIMPLE_EDGE] + SHUFFLE [RS_183] PartitionCols:_col0 - Select Operator [SEL_157] (rows=36525 width=1119) + Select Operator [SEL_178] (rows=80000000 width=860) Output:["_col0"] - Filter Operator [FIL_340] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_36] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 20 [SIMPLE_EDGE] - SHUFFLE [RS_161] - PartitionCols:_col0 - Select Operator [SEL_154] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_339] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_33] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 39 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_223] - Group By Operator [GBY_205] (rows=1 width=224) + Filter Operator [FIL_349] (rows=80000000 width=860) + predicate:c_customer_sk is not null + Please refer to the previous TableScan [TS_36] + <-Reducer 44 [SIMPLE_EDGE] + SHUFFLE [RS_182] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_367] (rows=633595212 width=88) + Conds:RS_179._col0=RS_180._col0(Inner),Output:["_col1"] + <-Map 45 [SIMPLE_EDGE] + SHUFFLE [RS_180] + PartitionCols:_col0 + Select Operator [SEL_175] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_348] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_51] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 42 [SIMPLE_EDGE] + SHUFFLE [RS_179] + PartitionCols:_col0 + Select Operator [SEL_172] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_347] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_48] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 41 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_230] + Group By Operator [GBY_227] (rows=1 width=224) Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 38 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_204] - Group By Operator [GBY_203] (rows=1 width=224) + <-Reducer 40 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_226] + Group By Operator [GBY_225] (rows=1 width=224) Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_201] (rows=348477374 width=88) + Select Operator [SEL_223] (rows=348477374 width=88) Output:["_col1"] - Group By Operator [GBY_200] (rows=348477374 width=88) + Group By Operator [GBY_222] (rows=348477374 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 37 [SIMPLE_EDGE] - SHUFFLE [RS_199] + <-Reducer 39 [SIMPLE_EDGE] + SHUFFLE [RS_221] PartitionCols:_col0 - Group By Operator [GBY_198] (rows=696954748 width=88) + Group By Operator [GBY_220] (rows=696954748 width=88) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_196] (rows=696954748 width=88) + Select Operator [SEL_218] (rows=696954748 width=88) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_363] (rows=696954748 width=88) - Conds:RS_193._col1=RS_194._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 40 [SIMPLE_EDGE] - SHUFFLE [RS_194] + Merge Join Operator [MERGEJOIN_370] (rows=696954748 width=88) + Conds:RS_215._col1=RS_216._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 29 [SIMPLE_EDGE] + SHUFFLE [RS_216] PartitionCols:_col0 - Select Operator [SEL_189] (rows=80000000 width=860) + Select Operator [SEL_211] (rows=80000000 width=860) Output:["_col0"] - Filter Operator [FIL_344] (rows=80000000 width=860) + Filter Operator [FIL_352] (rows=80000000 width=860) predicate:c_customer_sk is not null - Please refer to the previous TableScan [TS_39] - <-Reducer 36 [SIMPLE_EDGE] - SHUFFLE [RS_193] + Please refer to the previous TableScan [TS_36] + <-Reducer 47 [SIMPLE_EDGE] + SHUFFLE [RS_215] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_362] (rows=633595212 width=88) - Conds:RS_190._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_191] + Merge Join Operator [MERGEJOIN_369] (rows=633595212 width=88) + Conds:RS_212._col0=RS_213._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 45 [SIMPLE_EDGE] + SHUFFLE [RS_213] PartitionCols:_col0 - Select Operator [SEL_186] (rows=36525 width=1119) + Select Operator [SEL_208] (rows=36525 width=1119) Output:["_col0"] - Filter Operator [FIL_343] (rows=36525 width=1119) + Filter Operator [FIL_351] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_36] - <-Map 45 [SIMPLE_EDGE] - SHUFFLE [RS_190] + Please refer to the previous TableScan [TS_51] + <-Map 48 [SIMPLE_EDGE] + SHUFFLE [RS_212] PartitionCols:_col0 - Select Operator [SEL_183] (rows=575995635 width=88) + Select Operator [SEL_205] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_342] (rows=575995635 width=88) + Filter Operator [FIL_350] (rows=575995635 width=88) predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_62] (rows=575995635 width=88) + TableScan [TS_81] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 44 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_224] - Group By Operator [GBY_220] (rows=316797606 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 43 [SIMPLE_EDGE] - SHUFFLE [RS_219] - PartitionCols:_col0 - Group By Operator [GBY_218] (rows=633595212 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_216] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_364] (rows=633595212 width=88) - Conds:RS_213._col0=RS_214._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 40 [SIMPLE_EDGE] - SHUFFLE [RS_214] - PartitionCols:_col0 - Select Operator [SEL_212] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_346] (rows=80000000 width=860) - predicate:c_customer_sk is not null - Please refer to the previous TableScan [TS_39] - <-Map 46 [SIMPLE_EDGE] - SHUFFLE [RS_213] - PartitionCols:_col0 - Select Operator [SEL_209] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_345] (rows=575995635 width=88) - predicate:ss_customer_sk is not null - TableScan [TS_88] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_234] + SHUFFLE [RS_240] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_366] (rows=174243235 width=135) - Conds:RS_231._col1=RS_232._col0(Inner),Output:["_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_372] (rows=174243235 width=135) + Conds:RS_237._col1=RS_238._col0(Inner),Output:["_col2","_col3","_col4"] <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_232] + SHUFFLE [RS_238] PartitionCols:_col0 - Group By Operator [GBY_150] (rows=58079562 width=88) + Group By Operator [GBY_153] (rows=58079562 width=88) Output:["_col0"],keys:_col1 - Select Operator [SEL_146] (rows=116159124 width=88) + Select Operator [SEL_149] (rows=116159124 width=88) Output:["_col1"] - Filter Operator [FIL_145] (rows=116159124 width=88) + Filter Operator [FIL_148] (rows=116159124 width=88) predicate:(_col3 > 4) - Select Operator [SEL_348] (rows=348477374 width=88) + Select Operator [SEL_354] (rows=348477374 width=88) Output:["_col0","_col3"] - Group By Operator [GBY_144] (rows=348477374 width=88) + Group By Operator [GBY_147] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_143] + SHUFFLE [RS_146] PartitionCols:_col0 - Group By Operator [GBY_142] (rows=696954748 width=88) + Group By Operator [GBY_145] (rows=696954748 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_140] (rows=696954748 width=88) + Select Operator [SEL_143] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_359] (rows=696954748 width=88) - Conds:RS_137._col1=RS_138._col0(Inner),Output:["_col3","_col5","_col6"] + Merge Join Operator [MERGEJOIN_365] (rows=696954748 width=88) + Conds:RS_140._col1=RS_141._col0(Inner),Output:["_col3","_col5","_col6"] <-Map 19 [SIMPLE_EDGE] - SHUFFLE [RS_138] + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_133] (rows=462000 width=1436) + Select Operator [SEL_136] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_338] (rows=462000 width=1436) + Filter Operator [FIL_344] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_12] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_137] + SHUFFLE [RS_140] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_358] (rows=633595212 width=88) - Conds:RS_134._col0=RS_135._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_364] (rows=633595212 width=88) + Conds:RS_137._col0=RS_138._col0(Inner),Output:["_col1","_col3"] <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_134] + SHUFFLE [RS_137] PartitionCols:_col0 - Select Operator [SEL_127] (rows=575995635 width=88) + Select Operator [SEL_130] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_336] (rows=575995635 width=88) + Filter Operator [FIL_342] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null) TableScan [TS_6] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_135] + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_130] (rows=36525 width=1119) + Select Operator [SEL_133] (rows=36525 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_337] (rows=36525 width=1119) + Filter Operator [FIL_343] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_231] + SHUFFLE [RS_237] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_357] (rows=158402938 width=135) - Conds:RS_228._col0=RS_229._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_363] (rows=158402938 width=135) + Conds:RS_234._col0=RS_235._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_229] + SHUFFLE [RS_235] PartitionCols:_col0 - Select Operator [SEL_124] (rows=18262 width=1119) + Select Operator [SEL_127] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_335] (rows=18262 width=1119) + Filter Operator [FIL_341] (rows=18262 width=1119) predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 47 [SIMPLE_EDGE] - SHUFFLE [RS_228] + <-Map 49 [SIMPLE_EDGE] + SHUFFLE [RS_234] PartitionCols:_col0 - Select Operator [SEL_121] (rows=144002668 width=135) + Select Operator [SEL_124] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_334] (rows=144002668 width=135) + Filter Operator [FIL_340] (rows=144002668 width=135) predicate:(ws_item_sk is not null and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_119] (rows=144002668 width=135) + TableScan [TS_122] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_242] - Group By Operator [GBY_241] (rows=1 width=112) + Reduce Output Operator [RS_248] + Group By Operator [GBY_247] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_118] (rows=383314495 width=135) + Select Operator [SEL_121] (rows=383314495 width=135) Output:["_col0"] - Merge Join Operator [MERGEJOIN_368] (rows=383314495 width=135) - Conds:RS_115._col1=RS_116._col0(Inner),Output:["_col3","_col4"] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_116] + Merge Join Operator [MERGEJOIN_375] (rows=383314495 width=135) + Conds:RS_118._col1=RS_119._col0(Inner),Output:["_col3","_col4"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_119] PartitionCols:_col0 - Select Operator [SEL_108] (rows=105599202 width=321) + Select Operator [SEL_111] (rows=105599202 width=322) Output:["_col0"] - Filter Operator [FIL_107] (rows=105599202 width=321) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_367] (rows=316797606 width=321) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 24 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_103] - Select Operator [SEL_61] (rows=1 width=8) - Filter Operator [FIL_60] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_58] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 23 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_57] - Group By Operator [GBY_56] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_53] (rows=348477374 width=88) - Group By Operator [GBY_52] (rows=348477374 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_51] + Filter Operator [FIL_110] (rows=105599202 width=322) + predicate:(_col1 > (0.95 * _col2)) + Merge Join Operator [MERGEJOIN_374] (rows=316797606 width=322) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 23 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_107] + Merge Join Operator [MERGEJOIN_373] (rows=316797606 width=97) + Conds:(Inner),Output:["_col0","_col1"] + <-Reducer 22 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_77] + Group By Operator [GBY_46] (rows=316797606 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Group By Operator [GBY_44] (rows=633595212 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_42] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_358] (rows=633595212 width=88) + Conds:RS_39._col0=RS_40._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 29 [SIMPLE_EDGE] + SHUFFLE [RS_40] PartitionCols:_col0 - Group By Operator [GBY_50] (rows=696954748 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_48] (rows=696954748 width=88) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_353] (rows=696954748 width=88) - Conds:RS_45._col1=RS_46._col0(Inner),Output:["_col6"] - <-Map 40 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col0 - Select Operator [SEL_41] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_328] (rows=80000000 width=860) - predicate:c_customer_sk is not null - Please refer to the previous TableScan [TS_39] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_352] (rows=633595212 width=88) - Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col1"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_43] + Select Operator [SEL_38] (rows=80000000 width=860) + Output:["_col0"] + Filter Operator [FIL_333] (rows=80000000 width=860) + predicate:c_customer_sk is not null + Please refer to the previous TableScan [TS_36] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_35] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_332] (rows=575995635 width=88) + predicate:ss_customer_sk is not null + Please refer to the previous TableScan [TS_33] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_78] + Select Operator [SEL_76] (rows=1 width=8) + Filter Operator [FIL_75] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_73] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_72] + Group By Operator [GBY_71] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_68] (rows=348477374 width=88) + Group By Operator [GBY_67] (rows=348477374 width=88) + Output:["_col0"],keys:KEY._col0 + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Group By Operator [GBY_65] (rows=696954748 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_63] (rows=696954748 width=88) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_360] (rows=696954748 width=88) + Conds:RS_60._col1=RS_61._col0(Inner),Output:["_col6"] + <-Map 29 [SIMPLE_EDGE] + SHUFFLE [RS_61] PartitionCols:_col0 - Select Operator [SEL_38] (rows=36525 width=1119) + Select Operator [SEL_56] (rows=80000000 width=860) Output:["_col0"] - Filter Operator [FIL_327] (rows=36525 width=1119) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + Filter Operator [FIL_336] (rows=80000000 width=860) + predicate:c_customer_sk is not null Please refer to the previous TableScan [TS_36] - <-Map 20 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_35] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_326] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - Please refer to the previous TableScan [TS_33] + <-Reducer 43 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_359] (rows=633595212 width=88) + Conds:RS_57._col0=RS_58._col0(Inner),Output:["_col1"] + <-Map 45 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0 + Select Operator [SEL_53] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_335] (rows=36525 width=1119) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + Please refer to the previous TableScan [TS_51] + <-Map 42 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col0 + Select Operator [SEL_50] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_334] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + Please refer to the previous TableScan [TS_48] <-Reducer 35 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_104] - Group By Operator [GBY_86] (rows=1 width=224) + PARTITION_ONLY_SHUFFLE [RS_108] + Group By Operator [GBY_105] (rows=1 width=224) Output:["_col0"],aggregations:["max(VALUE._col0)"] <-Reducer 34 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_85] - Group By Operator [GBY_84] (rows=1 width=224) + PARTITION_ONLY_SHUFFLE [RS_104] + Group By Operator [GBY_103] (rows=1 width=224) Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_82] (rows=348477374 width=88) + Select Operator [SEL_101] (rows=348477374 width=88) Output:["_col1"] - Group By Operator [GBY_81] (rows=348477374 width=88) + Group By Operator [GBY_100] (rows=348477374 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_80] + SHUFFLE [RS_99] PartitionCols:_col0 - Group By Operator [GBY_79] (rows=696954748 width=88) + Group By Operator [GBY_98] (rows=696954748 width=88) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_77] (rows=696954748 width=88) + Select Operator [SEL_96] (rows=696954748 width=88) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_355] (rows=696954748 width=88) - Conds:RS_74._col1=RS_75._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 40 [SIMPLE_EDGE] - SHUFFLE [RS_75] + Merge Join Operator [MERGEJOIN_362] (rows=696954748 width=88) + Conds:RS_93._col1=RS_94._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 29 [SIMPLE_EDGE] + SHUFFLE [RS_94] PartitionCols:_col0 - Select Operator [SEL_70] (rows=80000000 width=860) + Select Operator [SEL_89] (rows=80000000 width=860) Output:["_col0"] - Filter Operator [FIL_331] (rows=80000000 width=860) + Filter Operator [FIL_339] (rows=80000000 width=860) predicate:c_customer_sk is not null - Please refer to the previous TableScan [TS_39] - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_74] + Please refer to the previous TableScan [TS_36] + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_93] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_354] (rows=633595212 width=88) - Conds:RS_71._col0=RS_72._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_72] + Merge Join Operator [MERGEJOIN_361] (rows=633595212 width=88) + Conds:RS_90._col0=RS_91._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 45 [SIMPLE_EDGE] + SHUFFLE [RS_91] PartitionCols:_col0 - Select Operator [SEL_67] (rows=36525 width=1119) + Select Operator [SEL_86] (rows=36525 width=1119) Output:["_col0"] - Filter Operator [FIL_330] (rows=36525 width=1119) + Filter Operator [FIL_338] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_36] - <-Map 45 [SIMPLE_EDGE] - SHUFFLE [RS_71] + Please refer to the previous TableScan [TS_51] + <-Map 48 [SIMPLE_EDGE] + SHUFFLE [RS_90] PartitionCols:_col0 - Select Operator [SEL_64] (rows=575995635 width=88) + Select Operator [SEL_83] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_329] (rows=575995635 width=88) + Filter Operator [FIL_337] (rows=575995635 width=88) predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - Please refer to the previous TableScan [TS_62] - <-Reducer 42 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_105] - Group By Operator [GBY_101] (rows=316797606 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 41 [SIMPLE_EDGE] - SHUFFLE [RS_100] - PartitionCols:_col0 - Group By Operator [GBY_99] (rows=633595212 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_97] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_356] (rows=633595212 width=88) - Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 40 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col0 - Select Operator [SEL_93] (rows=80000000 width=860) - Output:["_col0"] - Filter Operator [FIL_333] (rows=80000000 width=860) - predicate:c_customer_sk is not null - Please refer to the previous TableScan [TS_39] - <-Map 46 [SIMPLE_EDGE] - SHUFFLE [RS_94] - PartitionCols:_col0 - Select Operator [SEL_90] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_332] (rows=575995635 width=88) - predicate:ss_customer_sk is not null - Please refer to the previous TableScan [TS_88] + Please refer to the previous TableScan [TS_81] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_115] + SHUFFLE [RS_118] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_365] (rows=348467716 width=135) - Conds:RS_112._col2=RS_113._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_371] (rows=348467716 width=135) + Conds:RS_115._col2=RS_116._col0(Inner),Output:["_col1","_col3","_col4"] <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_113] + SHUFFLE [RS_116] PartitionCols:_col0 Group By Operator [GBY_31] (rows=58079562 width=88) Output:["_col0"],keys:_col1 @@ -549,7 +561,7 @@ Stage-0 Output:["_col1"] Filter Operator [FIL_26] (rows=116159124 width=88) predicate:(_col3 > 4) - Select Operator [SEL_347] (rows=348477374 width=88) + Select Operator [SEL_353] (rows=348477374 width=88) Output:["_col0","_col3"] Group By Operator [GBY_25] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 @@ -560,27 +572,27 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 Select Operator [SEL_21] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_351] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_357] (rows=696954748 width=88) Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col3","_col5","_col6"] <-Map 19 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_14] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_325] (rows=462000 width=1436) + Filter Operator [FIL_331] (rows=462000 width=1436) predicate:i_item_sk is not null Please refer to the previous TableScan [TS_12] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_350] (rows=633595212 width=88) + Merge Join Operator [MERGEJOIN_356] (rows=633595212 width=88) Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col3"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 Select Operator [SEL_8] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_323] (rows=575995635 width=88) + Filter Operator [FIL_329] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null) Please refer to the previous TableScan [TS_6] <-Map 18 [SIMPLE_EDGE] @@ -588,28 +600,28 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_11] (rows=36525 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_324] (rows=36525 width=1119) + Filter Operator [FIL_330] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) Please refer to the previous TableScan [TS_9] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_112] + SHUFFLE [RS_115] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_349] (rows=316788826 width=135) - Conds:RS_109._col0=RS_110._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_355] (rows=316788826 width=135) + Conds:RS_112._col0=RS_113._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_110] + SHUFFLE [RS_113] PartitionCols:_col0 Select Operator [SEL_5] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_322] (rows=18262 width=1119) + Filter Operator [FIL_328] (rows=18262 width=1119) predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_109] + SHUFFLE [RS_112] PartitionCols:_col0 Select Operator [SEL_2] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_321] (rows=287989836 width=135) + Filter Operator [FIL_327] (rows=287989836 width=135) predicate:(cs_item_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] diff --git a/ql/src/test/results/clientpositive/perf/query44.q.out b/ql/src/test/results/clientpositive/perf/query44.q.out index 566548089c..a1e0036999 100644 --- a/ql/src/test/results/clientpositive/perf/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/query44.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[131][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 9' is a cross product -Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_4, $hdt$_5, $hdt$_3]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[93][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[95][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 11' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -71,209 +71,158 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 6 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE), Reducer 20 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE) -Reducer 17 <- Map 15 (SIMPLE_EDGE) -Reducer 19 <- Map 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 20 <- Map 18 (SIMPLE_EDGE) +Reducer 10 <- Map 6 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 16 (CUSTOM_SIMPLE_EDGE), Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 4 - File Output Operator [FS_104] - Limit [LIM_103] (rows=100 width=185) + File Output Operator [FS_72] + Limit [LIM_71] (rows=100 width=177) Number of rows:100 - Select Operator [SEL_102] (rows=1393898919384048 width=185) + Select Operator [SEL_70] (rows=1393898919384048 width=177) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_101] - Select Operator [SEL_100] (rows=1393898919384048 width=185) + SHUFFLE [RS_69] + Select Operator [SEL_68] (rows=1393898919384048 width=177) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_135] (rows=1393898919384048 width=185) - Conds:RS_97._col3=RS_98._col3(Inner),Output:["_col1","_col3","_col5"] + Merge Join Operator [MERGEJOIN_97] (rows=1393898919384048 width=177) + Conds:RS_65._col3=RS_66._col3(Inner),Output:["_col1","_col3","_col5"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_97] + SHUFFLE [RS_65] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_132] (rows=1267180808338276 width=185) - Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_94] (rows=1267180808338276 width=177) + Conds:RS_62._col0=RS_63._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_94] + SHUFFLE [RS_62] PartitionCols:_col0 Select Operator [SEL_2] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_121] (rows=462000 width=1436) + Filter Operator [FIL_85] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_0] (rows=462000 width=1436) default@item,i1,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_product_name"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_95] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col0 - Select Operator [SEL_41] (rows=1151982528066248 width=185) + Select Operator [SEL_25] (rows=1151982528066248 width=177) Output:["_col0","_col1"] - Filter Operator [FIL_122] (rows=1151982528066248 width=185) - predicate:((rank_window_0 < 11) and _col2 is not null) - PTF Operator [PTF_40] (rows=3455947584198744 width=185) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST","partition by:":"0"}] - Select Operator [SEL_39] (rows=3455947584198744 width=185) - Output:["_col2","_col3"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_38] + Filter Operator [FIL_86] (rows=1151982528066248 width=177) + predicate:((rank_window_0 < 11) and _col0 is not null) + PTF Operator [PTF_24] (rows=3455947584198744 width=177) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"0"}] + Select Operator [SEL_23] (rows=3455947584198744 width=177) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_22] PartitionCols:0 - Filter Operator [FIL_37] (rows=3455947584198744 width=185) - predicate:(_col3 > (0.9 * _col1)) - Merge Join Operator [MERGEJOIN_131] (rows=10367842752596232 width=185) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 16 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_34] - Select Operator [SEL_25] (rows=71999454 width=88) + Filter Operator [FIL_21] (rows=3455947584198744 width=177) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_93] (rows=10367842752596232 width=177) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_19] + Select Operator [SEL_17] (rows=71999454 width=88) Output:["_col0"] - Group By Operator [GBY_24] (rows=71999454 width=88) + Group By Operator [GBY_16] (rows=71999454 width=88) Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_15] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=143998908 width=88) + Group By Operator [GBY_14] (rows=143998908 width=88) Output:["_col0","_col1"],aggregations:["avg(_col1)"],keys:410 - Select Operator [SEL_20] (rows=143998908 width=88) + Select Operator [SEL_12] (rows=143998908 width=88) Output:["_col1"] - Filter Operator [FIL_124] (rows=143998908 width=88) + Filter Operator [FIL_88] (rows=143998908 width=88) predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - TableScan [TS_18] (rows=575995635 width=88) + TableScan [TS_10] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_35] - Group By Operator [GBY_31] (rows=143998908 width=88) + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_8] (rows=143998908 width=88) Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_30] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_7] PartitionCols:_col0 - Group By Operator [GBY_29] (rows=287997817 width=88) + Group By Operator [GBY_6] (rows=287997817 width=88) Output:["_col0","_col1"],aggregations:["avg(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_28] (rows=287997817 width=88) + Select Operator [SEL_5] (rows=287997817 width=88) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_125] (rows=287997817 width=88) + Filter Operator [FIL_87] (rows=287997817 width=88) predicate:(ss_store_sk = 410) - TableScan [TS_26] (rows=575995635 width=88) + TableScan [TS_3] (rows=575995635 width=88) default@store_sales,ss1,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_33] - Select Operator [SEL_17] (rows=1 width=8) - Filter Operator [FIL_16] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_14] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_13] - Group By Operator [GBY_12] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_10] (rows=71999454 width=88) - Group By Operator [GBY_9] (rows=71999454 width=88) - Output:["_col0"],keys:KEY._col0 - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_8] - PartitionCols:_col0 - Group By Operator [GBY_7] (rows=143998908 width=88) - Output:["_col0"],keys:410 - Select Operator [SEL_5] (rows=143998908 width=88) - Filter Operator [FIL_123] (rows=143998908 width=88) - predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_hdemo_sk","ss_store_sk"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_98] + SHUFFLE [RS_66] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_134] (rows=1267180808338276 width=185) - Conds:RS_90._col0=RS_91._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_96] (rows=1267180808338276 width=177) + Conds:RS_58._col0=RS_59._col0(Inner),Output:["_col1","_col3"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_90] + SHUFFLE [RS_58] PartitionCols:_col0 - Select Operator [SEL_47] (rows=462000 width=1436) + Select Operator [SEL_31] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_126] (rows=462000 width=1436) + Filter Operator [FIL_89] (rows=462000 width=1436) predicate:i_item_sk is not null Please refer to the previous TableScan [TS_0] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_91] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_59] PartitionCols:_col0 - Select Operator [SEL_86] (rows=1151982528066248 width=185) + Select Operator [SEL_54] (rows=1151982528066248 width=177) Output:["_col0","_col1"] - Filter Operator [FIL_127] (rows=1151982528066248 width=185) - predicate:((rank_window_0 < 11) and _col2 is not null) - PTF Operator [PTF_85] (rows=3455947584198744 width=185) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_84] (rows=3455947584198744 width=185) - Output:["_col2","_col3"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_83] + Filter Operator [FIL_90] (rows=1151982528066248 width=177) + predicate:((rank_window_0 < 11) and _col0 is not null) + PTF Operator [PTF_53] (rows=3455947584198744 width=177) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_52] (rows=3455947584198744 width=177) + Output:["_col0","_col1"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_51] PartitionCols:0 - Filter Operator [FIL_82] (rows=3455947584198744 width=185) - predicate:(_col3 > (0.9 * _col1)) - Merge Join Operator [MERGEJOIN_133] (rows=10367842752596232 width=185) - Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_78] - Select Operator [SEL_62] (rows=1 width=8) - Filter Operator [FIL_61] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_59] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_58] - Group By Operator [GBY_57] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_55] (rows=71999454 width=88) - Group By Operator [GBY_54] (rows=71999454 width=88) - Output:["_col0"],keys:KEY._col0 - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col0 - Group By Operator [GBY_52] (rows=143998908 width=88) - Output:["_col0"],keys:410 - Select Operator [SEL_50] (rows=143998908 width=88) - Filter Operator [FIL_128] (rows=143998908 width=88) - predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - Please refer to the previous TableScan [TS_3] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_79] - Select Operator [SEL_70] (rows=71999454 width=88) + Filter Operator [FIL_50] (rows=3455947584198744 width=177) + predicate:(_col1 > (0.9 * _col2)) + Merge Join Operator [MERGEJOIN_95] (rows=10367842752596232 width=177) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_47] + Group By Operator [GBY_37] (rows=143998908 width=88) + Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Group By Operator [GBY_35] (rows=287997817 width=88) + Output:["_col0","_col1"],aggregations:["avg(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_34] (rows=287997817 width=88) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_91] (rows=287997817 width=88) + predicate:(ss_store_sk = 410) + Please refer to the previous TableScan [TS_3] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_48] + Select Operator [SEL_46] (rows=71999454 width=88) Output:["_col0"] - Group By Operator [GBY_69] (rows=71999454 width=88) + Group By Operator [GBY_45] (rows=71999454 width=88) Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_68] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0 - Group By Operator [GBY_67] (rows=143998908 width=88) + Group By Operator [GBY_43] (rows=143998908 width=88) Output:["_col0","_col1"],aggregations:["avg(_col1)"],keys:410 - Select Operator [SEL_65] (rows=143998908 width=88) + Select Operator [SEL_41] (rows=143998908 width=88) Output:["_col1"] - Filter Operator [FIL_129] (rows=143998908 width=88) + Filter Operator [FIL_92] (rows=143998908 width=88) predicate:((ss_store_sk = 410) and ss_hdemo_sk is null) - Please refer to the previous TableScan [TS_18] - <-Reducer 20 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_80] - Group By Operator [GBY_76] (rows=143998908 width=88) - Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_75] - PartitionCols:_col0 - Group By Operator [GBY_74] (rows=287997817 width=88) - Output:["_col0","_col1"],aggregations:["avg(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_73] (rows=287997817 width=88) - Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_130] (rows=287997817 width=88) - predicate:(ss_store_sk = 410) - Please refer to the previous TableScan [TS_26] + Please refer to the previous TableScan [TS_10] diff --git a/ql/src/test/results/clientpositive/perf/query54.q.out b/ql/src/test/results/clientpositive/perf/query54.q.out index 3cbcbe33f9..2b6283afd1 100644 --- a/ql/src/test/results/clientpositive/perf/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[191][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[188][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0, $hdt$_4]] in Stage 'Reducer 12' is a cross product -Warning: Shuffle Join MERGEJOIN[190][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 14' is a cross product -Warning: Shuffle Join MERGEJOIN[192][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[189][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[191][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[192][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[193][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -115,282 +115,280 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 21 <- Union 22 (CONTAINS) -Map 27 <- Union 22 (CONTAINS) -Reducer 10 <- Map 1 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 19 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 32 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 1 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 19 <- Map 31 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 23 <- Map 28 (SIMPLE_EDGE), Union 22 (SIMPLE_EDGE) -Reducer 24 <- Map 29 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 25 <- Map 30 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Map 13 <- Union 14 (CONTAINS) +Map 19 <- Union 14 (CONTAINS) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 15 <- Map 20 (SIMPLE_EDGE), Union 14 (SIMPLE_EDGE) +Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 22 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE), Reducer 28 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 26 (SIMPLE_EDGE) +Reducer 3 <- Reducer 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 26 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 26 (SIMPLE_EDGE) +Reducer 4 <- Map 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 25 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 - File Output Operator [FS_129] - Limit [LIM_128] (rows=100 width=158) + Reducer 11 + File Output Operator [FS_130] + Limit [LIM_129] (rows=100 width=158) Number of rows:100 - Select Operator [SEL_127] (rows=1614130953450400 width=158) + Select Operator [SEL_128] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_126] - Select Operator [SEL_125] (rows=1614130953450400 width=158) + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_127] + Select Operator [SEL_126] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_124] (rows=1614130953450400 width=158) + Group By Operator [GBY_125] (rows=1614130953450400 width=158) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_123] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_124] PartitionCols:_col0 - Group By Operator [GBY_122] (rows=3228261906900801 width=158) + Group By Operator [GBY_123] (rows=3228261906900801 width=158) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_120] (rows=3228261906900801 width=158) + Select Operator [SEL_121] (rows=3228261906900801 width=158) Output:["_col0"] - Group By Operator [GBY_119] (rows=3228261906900801 width=158) + Group By Operator [GBY_120] (rows=3228261906900801 width=158) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_118] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_119] PartitionCols:_col0 - Group By Operator [GBY_117] (rows=6456523813801603 width=158) - Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col0 - Select Operator [SEL_116] (rows=6456523813801603 width=158) - Output:["_col0","_col4"] - Filter Operator [FIL_115] (rows=6456523813801603 width=158) - predicate:_col11 BETWEEN _col13 AND _col15 - Select Operator [SEL_114] (rows=58108714324214428 width=158) - Output:["_col0","_col4","_col11","_col13","_col15"] - Merge Join Operator [MERGEJOIN_192] (rows=58108714324214428 width=158) - Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"] - <-Reducer 14 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_112] - Select Operator [SEL_107] (rows=6363893803988 width=1217) - Output:["_col0","_col4","_col11","_col13"] - Merge Join Operator [MERGEJOIN_190] (rows=6363893803988 width=1217) - Conds:(Left Outer),Output:["_col5","_col9","_col12","_col13"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_104] - Merge Join Operator [MERGEJOIN_189] (rows=696954748 width=97) - Conds:RS_101._col7=RS_102._col0(Inner),Output:["_col5","_col9","_col12"] - <-Map 32 [SIMPLE_EDGE] - SHUFFLE [RS_102] - PartitionCols:_col0 - Select Operator [SEL_80] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_180] (rows=73049 width=1119) - predicate:d_date_sk is not null - TableScan [TS_78] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_101] - PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_188] (rows=633595212 width=97) - Conds:(Inner),Output:["_col5","_col7","_col9"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_99] - Select Operator [SEL_77] (rows=1 width=8) - Filter Operator [FIL_76] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_74] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 10 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_73] - Group By Operator [GBY_72] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_70] (rows=9131 width=1119) - Group By Operator [GBY_69] (rows=9131 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_68] - PartitionCols:_col0 - Group By Operator [GBY_67] (rows=18262 width=1119) - Output:["_col0"],keys:_col0 - Select Operator [SEL_65] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_179] (rows=18262 width=1119) - predicate:((d_year = 1999) and (d_moy = 3)) - TableScan [TS_0] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_98] - Merge Join Operator [MERGEJOIN_187] (rows=633595212 width=88) - Conds:RS_95._col5=RS_96._col1(Inner),Output:["_col5","_col7","_col9"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_96] - PartitionCols:_col1 - Select Operator [SEL_62] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_178] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_60] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_186] (rows=316240138 width=135) - Conds:RS_92._col0=RS_93._col1(Inner),Output:["_col5"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_92] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_182] (rows=44000000 width=1014) - Conds:RS_89._col1, _col2=RS_90._col0, _col1(Inner),Output:["_col0"] - <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_89] - PartitionCols:_col1, _col2 - Select Operator [SEL_25] (rows=40000000 width=1014) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_171] (rows=40000000 width=1014) - predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) - TableScan [TS_23] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 20 [SIMPLE_EDGE] - SHUFFLE [RS_90] - PartitionCols:_col0, _col1 - Select Operator [SEL_28] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_172] (rows=1704 width=1910) - predicate:(s_county is not null and s_state is not null) - TableScan [TS_26] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_93] - PartitionCols:_col1 - Select Operator [SEL_59] (rows=287491029 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_58] (rows=287491029 width=135) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col0, _col1 - Group By Operator [GBY_56] (rows=574982058 width=135) - Output:["_col0","_col1"],keys:_col10, _col9 - Merge Join Operator [MERGEJOIN_185] (rows=574982058 width=135) - Conds:RS_52._col1=RS_53._col0(Inner),Output:["_col9","_col10"] - <-Map 30 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col0 - Select Operator [SEL_45] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_177] (rows=80000000 width=860) - predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_43] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_184] (rows=522710951 width=135) - Conds:RS_49._col2=RS_50._col0(Inner),Output:["_col1"] - <-Map 29 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col0 - Select Operator [SEL_42] (rows=115500 width=1436) - Output:["_col0"] - Filter Operator [FIL_176] (rows=115500 width=1436) - predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) - TableScan [TS_40] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_183] (rows=475191764 width=135) - Conds:Union 22._col0=RS_47._col0(Inner),Output:["_col1","_col2"] - <-Map 28 [SIMPLE_EDGE] - SHUFFLE [RS_47] - PartitionCols:_col0 - Select Operator [SEL_39] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_175] (rows=18262 width=1119) - predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_37] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Union 22 [SIMPLE_EDGE] - <-Map 21 [CONTAINS] - Reduce Output Operator [RS_46] - PartitionCols:_col0 - Select Operator [SEL_31] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_173] (rows=287989836 width=135) - predicate:(cs_item_sk is not null and cs_sold_date_sk is not null and cs_bill_customer_sk is not null) - TableScan [TS_29] (rows=287989836 width=135) - Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Map 27 [CONTAINS] - Reduce Output Operator [RS_46] - PartitionCols:_col0 - Select Operator [SEL_34] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_174] (rows=144002668 width=135) - predicate:(ws_item_sk is not null and ws_sold_date_sk is not null and ws_bill_customer_sk is not null) - TableScan [TS_32] (rows=144002668 width=135) - Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_105] - Group By Operator [GBY_87] (rows=9131 width=1119) + Group By Operator [GBY_118] (rows=6456523813801603 width=158) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_117] (rows=6456523813801603 width=158) + Output:["_col0","_col1"] + Filter Operator [FIL_116] (rows=6456523813801603 width=158) + predicate:_col2 BETWEEN _col3 AND _col4 + Merge Join Operator [MERGEJOIN_193] (rows=58108714324214428 width=158) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_114] + Group By Operator [GBY_111] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_110] + PartitionCols:_col0 + Group By Operator [GBY_109] (rows=18262 width=1119) + Output:["_col0"],keys:_col0 + Select Operator [SEL_107] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_182] (rows=18262 width=1119) + predicate:((d_year = 1999) and (d_moy = 3)) + TableScan [TS_43] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_113] + Merge Join Operator [MERGEJOIN_192] (rows=6363893803988 width=1217) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_102] + Select Operator [SEL_97] (rows=1 width=8) + Filter Operator [FIL_96] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_94] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_93] + Group By Operator [GBY_92] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_90] (rows=9131 width=1119) + Group By Operator [GBY_89] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_88] + PartitionCols:_col0 + Group By Operator [GBY_87] (rows=18262 width=1119) + Output:["_col0"],keys:_col0 + Select Operator [SEL_85] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_181] (rows=18262 width=1119) + predicate:((d_year = 1999) and (d_moy = 3)) + Please refer to the previous TableScan [TS_43] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_101] + Merge Join Operator [MERGEJOIN_191] (rows=6363893803988 width=1208) + Conds:(Left Outer),Output:["_col0","_col1","_col2","_col3"] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_99] + Group By Operator [GBY_81] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_86] + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_80] PartitionCols:_col0 - Group By Operator [GBY_85] (rows=18262 width=1119) + Group By Operator [GBY_79] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_83] (rows=18262 width=1119) + Select Operator [SEL_77] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_181] (rows=18262 width=1119) + Filter Operator [FIL_180] (rows=18262 width=1119) predicate:((d_year = 1999) and (d_moy = 3)) - Please refer to the previous TableScan [TS_0] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Merge Join Operator [MERGEJOIN_191] (rows=9131 width=1128) - Conds:(Right Outer),Output:["_col0"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_108] - Group By Operator [GBY_6] (rows=9131 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_5] - PartitionCols:_col0 - Group By Operator [GBY_4] (rows=18262 width=1119) - Output:["_col0"],keys:_col0 - Select Operator [SEL_2] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_169] (rows=18262 width=1119) - predicate:((d_year = 1999) and (d_moy = 3)) - Please refer to the previous TableScan [TS_0] - <-Reducer 9 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_109] - Select Operator [SEL_22] (rows=1 width=8) - Filter Operator [FIL_21] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_19] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_18] - Group By Operator [GBY_17] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_15] (rows=9131 width=1119) - Group By Operator [GBY_14] (rows=9131 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_13] + Please refer to the previous TableScan [TS_43] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_98] + Select Operator [SEL_74] (rows=696954748 width=88) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_190] (rows=696954748 width=88) + Conds:RS_71._col7=RS_72._col0(Inner),Output:["_col5","_col9","_col11"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_189] (rows=73049 width=1128) + Conds:(Inner),Output:["_col0","_col1"] + <-Map 24 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_58] + Select Operator [SEL_42] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_178] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_40] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_59] + Select Operator [SEL_57] (rows=1 width=8) + Filter Operator [FIL_56] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_54] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_53] + Group By Operator [GBY_52] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_50] (rows=9131 width=1119) + Group By Operator [GBY_49] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col0 + Group By Operator [GBY_47] (rows=18262 width=1119) + Output:["_col0"],keys:_col0 + Select Operator [SEL_45] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_179] (rows=18262 width=1119) + predicate:((d_year = 1999) and (d_moy = 3)) + Please refer to the previous TableScan [TS_43] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_188] (rows=633595212 width=88) + Conds:RS_68._col5=RS_69._col1(Inner),Output:["_col5","_col7","_col9"] + <-Map 23 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col1 + Select Operator [SEL_39] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_177] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_37] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_68] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_187] (rows=316240138 width=135) + Conds:RS_65._col0=RS_66._col1(Inner),Output:["_col5"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col1 + Select Operator [SEL_36] (rows=287491029 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_35] (rows=287491029 width=135) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0, _col1 + Group By Operator [GBY_33] (rows=574982058 width=135) + Output:["_col0","_col1"],keys:_col10, _col9 + Merge Join Operator [MERGEJOIN_186] (rows=574982058 width=135) + Conds:RS_29._col1=RS_30._col0(Inner),Output:["_col9","_col10"] + <-Map 22 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_22] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_176] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_20] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_185] (rows=522710951 width=135) + Conds:RS_26._col2=RS_27._col0(Inner),Output:["_col1"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=115500 width=1436) + Output:["_col0"] + Filter Operator [FIL_175] (rows=115500 width=1436) + predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) + TableScan [TS_17] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_184] (rows=475191764 width=135) + Conds:Union 14._col0=RS_24._col0(Inner),Output:["_col1","_col2"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_174] (rows=18262 width=1119) + predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_14] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] + Reduce Output Operator [RS_23] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_172] (rows=287989836 width=135) + predicate:(cs_item_sk is not null and cs_sold_date_sk is not null and cs_bill_customer_sk is not null) + TableScan [TS_6] (rows=287989836 width=135) + Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Map 19 [CONTAINS] + Reduce Output Operator [RS_23] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=144002668 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_173] (rows=144002668 width=135) + predicate:(ws_item_sk is not null and ws_sold_date_sk is not null and ws_bill_customer_sk is not null) + TableScan [TS_9] (rows=144002668 width=135) + Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_65] PartitionCols:_col0 - Group By Operator [GBY_12] (rows=18262 width=1119) - Output:["_col0"],keys:_col0 - Select Operator [SEL_10] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_170] (rows=18262 width=1119) - predicate:((d_year = 1999) and (d_moy = 3)) - Please refer to the previous TableScan [TS_0] + Merge Join Operator [MERGEJOIN_183] (rows=44000000 width=1014) + Conds:RS_62._col1, _col2=RS_63._col0, _col1(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col1, _col2 + Select Operator [SEL_2] (rows=40000000 width=1014) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_170] (rows=40000000 width=1014) + predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) + TableScan [TS_0] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_63] + PartitionCols:_col0, _col1 + Select Operator [SEL_5] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_171] (rows=1704 width=1910) + predicate:(s_county is not null and s_state is not null) + TableScan [TS_3] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] diff --git a/ql/src/test/results/clientpositive/perf/query58.q.out b/ql/src/test/results/clientpositive/perf/query58.q.out index acdfc07718..f34045d5ad 100644 --- a/ql/src/test/results/clientpositive/perf/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/query58.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[265][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 20' is a cross product -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_5, $hdt$_6]] in Stage 'Reducer 24' is a cross product -Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_6, $hdt$_7]] in Stage 'Reducer 28' is a cross product +Warning: Shuffle Join MERGEJOIN[268][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 19' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[276][tables = [$hdt$_5, $hdt$_6]] in Stage 'Reducer 25' is a cross product PREHOOK: query: explain with ss_items as (select i_item_id item_id @@ -136,23 +136,23 @@ Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Map 33 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 16 <- Map 14 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 17 <- Map 14 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 16 <- Map 14 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 17 <- Map 14 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE), Reducer 29 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Map 31 (CUSTOM_SIMPLE_EDGE), Reducer 19 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 31 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 31 (CUSTOM_SIMPLE_EDGE), Reducer 23 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 31 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 31 (CUSTOM_SIMPLE_EDGE), Reducer 27 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 31 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 20 <- Map 18 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 18 (CUSTOM_SIMPLE_EDGE), Reducer 30 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 18 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Map 18 (CUSTOM_SIMPLE_EDGE), Reducer 31 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 18 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE) +Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (SIMPLE_EDGE) +Reducer 30 <- Map 28 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 28 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) @@ -164,302 +164,302 @@ Stage-0 limit:100 Stage-1 Reducer 6 - File Output Operator [FS_164] - Limit [LIM_163] (rows=100 width=88) + File Output Operator [FS_167] + Limit [LIM_166] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_162] (rows=1442 width=88) + Select Operator [SEL_165] (rows=1442 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_161] - Select Operator [SEL_160] (rows=1442 width=88) + SHUFFLE [RS_164] + Select Operator [SEL_163] (rows=1442 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_154] (rows=1442 width=88) + Filter Operator [FIL_157] (rows=1442 width=88) predicate:(_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN (0.9 * _col5) AND (1.1 * _col5) and _col3 BETWEEN (0.9 * _col5) AND (1.1 * _col5) and _col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) - Merge Join Operator [MERGEJOIN_279] (rows=766650239 width=88) - Conds:RS_150._col0=RS_151._col0(Inner),RS_150._col0=RS_152._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + Merge Join Operator [MERGEJOIN_282] (rows=766650239 width=88) + Conds:RS_153._col0=RS_154._col0(Inner),RS_153._col0=RS_155._col0(Inner),Output:["_col0","_col1","_col3","_col5"] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_151] + SHUFFLE [RS_154] PartitionCols:_col0 - Group By Operator [GBY_98] (rows=348477374 width=88) + Group By Operator [GBY_100] (rows=348477374 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_97] + SHUFFLE [RS_99] PartitionCols:_col0 - Group By Operator [GBY_96] (rows=696954748 width=88) + Group By Operator [GBY_98] (rows=696954748 width=88) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_277] (rows=696954748 width=88) - Conds:RS_92._col0=RS_93._col0(Inner),Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_280] (rows=696954748 width=88) + Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_93] + SHUFFLE [RS_95] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_271] (rows=80353 width=1119) - Conds:RS_85._col1=RS_86._col0(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_274] (rows=80353 width=1119) + Conds:RS_87._col1=RS_88._col0(Inner),Output:["_col0"] <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_85] + SHUFFLE [RS_87] PartitionCols:_col1 - Select Operator [SEL_58] (rows=73049 width=1119) + Select Operator [SEL_59] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_254] (rows=73049 width=1119) + Filter Operator [FIL_257] (rows=73049 width=1119) predicate:(d_date is not null and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_86] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_88] PartitionCols:_col0 - Group By Operator [GBY_83] (rows=40176 width=1119) + Group By Operator [GBY_85] (rows=40176 width=1128) Output:["_col0"],keys:KEY._col0 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_82] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_84] PartitionCols:_col0 - Group By Operator [GBY_81] (rows=80353 width=1119) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_270] (rows=80353 width=1119) - Conds:RS_77._col1=RS_78._col1(Inner),Output:["_col2"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_78] - PartitionCols:_col1 - Select Operator [SEL_73] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_257] (rows=73049 width=1119) - predicate:(d_week_seq is not null and d_date is not null) - TableScan [TS_18] (rows=73049 width=1119) + Group By Operator [GBY_83] (rows=80353 width=1128) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_273] (rows=80353 width=1128) + Conds:RS_79._col1=RS_80._col0(Inner),Output:["_col0"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col0 + Select Operator [SEL_78] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_260] (rows=36524 width=1119) + predicate:((d_date = '1998-02-19') and d_week_seq is not null) + TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date","d_week_seq"] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_77] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_79] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_269] (rows=36524 width=1128) - Conds:(Inner),Output:["_col1"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_75] - Select Operator [SEL_70] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_256] (rows=36524 width=1119) - predicate:((d_date = '1998-02-19') and d_week_seq is not null) - Please refer to the previous TableScan [TS_18] - <-Reducer 23 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_74] - Select Operator [SEL_67] (rows=1 width=8) - Filter Operator [FIL_66] (rows=1 width=8) + Merge Join Operator [MERGEJOIN_272] (rows=73049 width=1128) + Conds:(Inner),Output:["_col0","_col1"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_72] + Select Operator [SEL_62] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_258] (rows=73049 width=1119) + predicate:(d_week_seq is not null and d_date is not null) + Please refer to the previous TableScan [TS_9] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_73] + Select Operator [SEL_71] (rows=1 width=8) + Filter Operator [FIL_70] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_64] (rows=1 width=8) + Group By Operator [GBY_68] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_63] - Group By Operator [GBY_62] (rows=1 width=8) + <-Map 28 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_67] + Group By Operator [GBY_66] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_61] (rows=36524 width=1119) - Filter Operator [FIL_255] (rows=36524 width=1119) + Select Operator [SEL_65] (rows=36524 width=1119) + Filter Operator [FIL_259] (rows=36524 width=1119) predicate:(d_date = '1998-02-19') - TableScan [TS_9] (rows=73049 width=1119) + TableScan [TS_12] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date"] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_92] + SHUFFLE [RS_94] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_268] (rows=633595212 width=88) - Conds:RS_89._col1=RS_90._col0(Inner),Output:["_col0","_col2","_col4"] + Merge Join Operator [MERGEJOIN_271] (rows=633595212 width=88) + Conds:RS_91._col1=RS_92._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_90] + SHUFFLE [RS_92] PartitionCols:_col0 - Select Operator [SEL_55] (rows=462000 width=1436) + Select Operator [SEL_56] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_253] (rows=462000 width=1436) + Filter Operator [FIL_256] (rows=462000 width=1436) predicate:(i_item_sk is not null and i_item_id is not null) TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] <-Map 32 [SIMPLE_EDGE] - SHUFFLE [RS_89] + SHUFFLE [RS_91] PartitionCols:_col1 - Select Operator [SEL_52] (rows=575995635 width=88) + Select Operator [SEL_53] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_252] (rows=575995635 width=88) + Filter Operator [FIL_255] (rows=575995635 width=88) predicate:(ss_item_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_50] (rows=575995635 width=88) + TableScan [TS_51] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_152] + SHUFFLE [RS_155] PartitionCols:_col0 - Group By Operator [GBY_148] (rows=87121617 width=135) + Group By Operator [GBY_151] (rows=87121617 width=135) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_147] + SHUFFLE [RS_150] PartitionCols:_col0 - Group By Operator [GBY_146] (rows=174243235 width=135) + Group By Operator [GBY_149] (rows=174243235 width=135) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_278] (rows=174243235 width=135) - Conds:RS_142._col0=RS_143._col0(Inner),Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_281] (rows=174243235 width=135) + Conds:RS_145._col0=RS_146._col0(Inner),Output:["_col2","_col4"] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_142] + SHUFFLE [RS_145] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_272] (rows=158402938 width=135) - Conds:RS_139._col1=RS_140._col0(Inner),Output:["_col0","_col2","_col4"] + Merge Join Operator [MERGEJOIN_275] (rows=158402938 width=135) + Conds:RS_142._col1=RS_143._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_140] + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_105] (rows=462000 width=1436) + Select Operator [SEL_107] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_259] (rows=462000 width=1436) + Filter Operator [FIL_262] (rows=462000 width=1436) predicate:(i_item_sk is not null and i_item_id is not null) Please refer to the previous TableScan [TS_3] <-Map 33 [SIMPLE_EDGE] - SHUFFLE [RS_139] + SHUFFLE [RS_142] PartitionCols:_col1 - Select Operator [SEL_102] (rows=144002668 width=135) + Select Operator [SEL_104] (rows=144002668 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_258] (rows=144002668 width=135) + Filter Operator [FIL_261] (rows=144002668 width=135) predicate:(ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_100] (rows=144002668 width=135) + TableScan [TS_102] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_143] + SHUFFLE [RS_146] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_275] (rows=80353 width=1119) - Conds:RS_135._col1=RS_136._col0(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_278] (rows=80353 width=1119) + Conds:RS_138._col1=RS_139._col0(Inner),Output:["_col0"] <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_135] + SHUFFLE [RS_138] PartitionCols:_col1 - Select Operator [SEL_108] (rows=73049 width=1119) + Select Operator [SEL_110] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_260] (rows=73049 width=1119) + Filter Operator [FIL_263] (rows=73049 width=1119) predicate:(d_date is not null and d_date_sk is not null) Please refer to the previous TableScan [TS_6] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_136] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_139] PartitionCols:_col0 - Group By Operator [GBY_133] (rows=40176 width=1119) + Group By Operator [GBY_136] (rows=40176 width=1128) Output:["_col0"],keys:KEY._col0 - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_132] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_135] PartitionCols:_col0 - Group By Operator [GBY_131] (rows=80353 width=1119) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_274] (rows=80353 width=1119) - Conds:RS_127._col1=RS_128._col1(Inner),Output:["_col2"] - <-Map 31 [SIMPLE_EDGE] - SHUFFLE [RS_128] + Group By Operator [GBY_134] (rows=80353 width=1128) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_277] (rows=80353 width=1128) + Conds:RS_130._col1=RS_131._col0(Inner),Output:["_col0"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_131] + PartitionCols:_col0 + Select Operator [SEL_129] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_266] (rows=36524 width=1119) + predicate:((d_date = '1998-02-19') and d_week_seq is not null) + Please refer to the previous TableScan [TS_9] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_130] PartitionCols:_col1 - Select Operator [SEL_123] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_263] (rows=73049 width=1119) - predicate:(d_week_seq is not null and d_date is not null) - Please refer to the previous TableScan [TS_18] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_127] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_273] (rows=36524 width=1128) - Conds:(Inner),Output:["_col1"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_125] - Select Operator [SEL_120] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_262] (rows=36524 width=1119) - predicate:((d_date = '1998-02-19') and d_week_seq is not null) - Please refer to the previous TableScan [TS_18] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_276] (rows=73049 width=1128) + Conds:(Inner),Output:["_col0","_col1"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_123] + Select Operator [SEL_113] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_264] (rows=73049 width=1119) + predicate:(d_week_seq is not null and d_date is not null) + Please refer to the previous TableScan [TS_9] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_124] - Select Operator [SEL_117] (rows=1 width=8) - Filter Operator [FIL_116] (rows=1 width=8) + Select Operator [SEL_122] (rows=1 width=8) + Filter Operator [FIL_121] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_114] (rows=1 width=8) + Group By Operator [GBY_119] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_113] - Group By Operator [GBY_112] (rows=1 width=8) + <-Map 28 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_118] + Group By Operator [GBY_117] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_111] (rows=36524 width=1119) - Filter Operator [FIL_261] (rows=36524 width=1119) + Select Operator [SEL_116] (rows=36524 width=1119) + Filter Operator [FIL_265] (rows=36524 width=1119) predicate:(d_date = '1998-02-19') - Please refer to the previous TableScan [TS_9] + Please refer to the previous TableScan [TS_12] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_150] + SHUFFLE [RS_153] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=174233858 width=135) + Group By Operator [GBY_49] (rows=174233858 width=135) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] + SHUFFLE [RS_48] PartitionCols:_col0 - Group By Operator [GBY_46] (rows=348467716 width=135) + Group By Operator [GBY_47] (rows=348467716 width=135) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_276] (rows=348467716 width=135) - Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_279] (rows=348467716 width=135) + Conds:RS_43._col0=RS_44._col0(Inner),Output:["_col2","_col4"] <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_43] + SHUFFLE [RS_44] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_267] (rows=80353 width=1119) - Conds:RS_35._col1=RS_36._col0(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_270] (rows=80353 width=1119) + Conds:RS_36._col1=RS_37._col0(Inner),Output:["_col0"] <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_35] + SHUFFLE [RS_36] PartitionCols:_col1 Select Operator [SEL_8] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_248] (rows=73049 width=1119) + Filter Operator [FIL_251] (rows=73049 width=1119) predicate:(d_date is not null and d_date_sk is not null) Please refer to the previous TableScan [TS_6] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_36] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_37] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=40176 width=1119) + Group By Operator [GBY_34] (rows=40176 width=1128) Output:["_col0"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_32] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_33] PartitionCols:_col0 - Group By Operator [GBY_31] (rows=80353 width=1119) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_266] (rows=80353 width=1119) - Conds:RS_27._col1=RS_28._col1(Inner),Output:["_col2"] - <-Map 31 [SIMPLE_EDGE] + Group By Operator [GBY_32] (rows=80353 width=1128) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_269] (rows=80353 width=1128) + Conds:RS_28._col1=RS_29._col0(Inner),Output:["_col0"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Select Operator [SEL_27] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_254] (rows=36524 width=1119) + predicate:((d_date = '1998-02-19') and d_week_seq is not null) + Please refer to the previous TableScan [TS_9] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col1 - Select Operator [SEL_23] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_251] (rows=73049 width=1119) - predicate:(d_week_seq is not null and d_date is not null) - Please refer to the previous TableScan [TS_18] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_265] (rows=36524 width=1128) - Conds:(Inner),Output:["_col1"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_25] - Select Operator [SEL_20] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_250] (rows=36524 width=1119) - predicate:((d_date = '1998-02-19') and d_week_seq is not null) - Please refer to the previous TableScan [TS_18] - <-Reducer 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_24] - Select Operator [SEL_17] (rows=1 width=8) - Filter Operator [FIL_16] (rows=1 width=8) + Merge Join Operator [MERGEJOIN_268] (rows=73049 width=1128) + Conds:(Inner),Output:["_col0","_col1"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_21] + Select Operator [SEL_11] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_252] (rows=73049 width=1119) + predicate:(d_week_seq is not null and d_date is not null) + Please refer to the previous TableScan [TS_9] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_22] + Select Operator [SEL_20] (rows=1 width=8) + Filter Operator [FIL_19] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_14] (rows=1 width=8) + Group By Operator [GBY_17] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_13] - Group By Operator [GBY_12] (rows=1 width=8) + <-Map 28 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_16] + Group By Operator [GBY_15] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_11] (rows=36524 width=1119) - Filter Operator [FIL_249] (rows=36524 width=1119) + Select Operator [SEL_14] (rows=36524 width=1119) + Filter Operator [FIL_253] (rows=36524 width=1119) predicate:(d_date = '1998-02-19') - Please refer to the previous TableScan [TS_9] + Please refer to the previous TableScan [TS_12] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_42] + SHUFFLE [RS_43] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_264] (rows=316788826 width=135) - Conds:RS_39._col1=RS_40._col0(Inner),Output:["_col0","_col2","_col4"] + Merge Join Operator [MERGEJOIN_267] (rows=316788826 width=135) + Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_41] PartitionCols:_col0 Select Operator [SEL_5] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_247] (rows=462000 width=1436) + Filter Operator [FIL_250] (rows=462000 width=1436) predicate:(i_item_sk is not null and i_item_id is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_39] + SHUFFLE [RS_40] PartitionCols:_col1 Select Operator [SEL_2] (rows=287989836 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_246] (rows=287989836 width=135) + Filter Operator [FIL_249] (rows=287989836 width=135) predicate:(cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] diff --git a/ql/src/test/results/clientpositive/perf/query6.q.out b/ql/src/test/results/clientpositive/perf/query6.q.out index 03fff79951..fd35f7ed90 100644 --- a/ql/src/test/results/clientpositive/perf/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/query6.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[111][tables = [$hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[110][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 14' is a cross product PREHOOK: query: explain select a.ca_state state, count(*) cnt from customer_address a @@ -52,179 +52,177 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 19 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 20 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 14 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 15 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 20 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 + Reducer 8 File Output Operator [FS_77] Limit [LIM_76] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_75] (rows=42591679 width=88) + Select Operator [SEL_75] (rows=51535934 width=88) Output:["_col0","_col1"] - <-Reducer 6 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_74] - Filter Operator [FIL_72] (rows=42591679 width=88) + Filter Operator [FIL_72] (rows=51535934 width=88) predicate:(_col1 >= 10) - Group By Operator [GBY_71] (rows=127775039 width=88) + Group By Operator [GBY_71] (rows=154607804 width=88) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_70] PartitionCols:_col0 - Group By Operator [GBY_69] (rows=255550079 width=88) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 - Select Operator [SEL_68] (rows=255550079 width=88) - Output:["_col1"] - Filter Operator [FIL_67] (rows=255550079 width=88) - predicate:(_col10 > (1.2 * CASE WHEN (_col15 is null) THEN (null) ELSE (_col14) END)) - Select Operator [SEL_66] (rows=766650239 width=88) - Output:["_col1","_col10","_col14","_col15"] - Merge Join Operator [MERGEJOIN_114] (rows=766650239 width=88) - Conds:RS_63._col6=RS_64._col0(Inner),Output:["_col1","_col3","_col4","_col12"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_64] - PartitionCols:_col0 - Select Operator [SEL_53] (rows=80353 width=1119) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_112] (rows=80353 width=1119) - Conds:RS_50._col0=RS_51._col1(Inner),Output:["_col2"] - <-Map 20 [SIMPLE_EDGE] + Group By Operator [GBY_69] (rows=309215609 width=88) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 + Select Operator [SEL_68] (rows=309215609 width=88) + Output:["_col0"] + Filter Operator [FIL_67] (rows=309215609 width=88) + predicate:(_col2 > (1.2 * CASE WHEN (_col6 is null) THEN (null) ELSE (_col5) END)) + Merge Join Operator [MERGEJOIN_114] (rows=927646829 width=88) + Conds:RS_64._col3=RS_65._col2(Left Outer),Output:["_col0","_col2","_col5","_col6"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col2 + Select Operator [SEL_60] (rows=231000 width=1436) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_59] (rows=231000 width=1436) + Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0 + Group By Operator [GBY_57] (rows=462000 width=1436) + Output:["_col0","_col1"],aggregations:["avg(i_current_price)"],keys:i_category + Filter Operator [FIL_107] (rows=462000 width=1436) + predicate:i_category is not null + TableScan [TS_54] (rows=462000 width=1436) + default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_113] (rows=843315281 width=88) + Conds:RS_61._col1=RS_62._col0(Inner),Output:["_col0","_col2","_col3"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col0 + Group By Operator [GBY_52] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 15 [SIMPLE_EDGE] SHUFFLE [RS_51] - PartitionCols:_col1 - Select Operator [SEL_46] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_107] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_month_seq is not null) - TableScan [TS_44] (rows=73049 width=1119) - default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_50] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_111] (rows=9131 width=1128) - Conds:(Inner),Output:["_col0"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_47] - Group By Operator [GBY_28] (rows=9131 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0 - Group By Operator [GBY_26] (rows=18262 width=1119) - Output:["_col0"],keys:d_month_seq - Select Operator [SEL_25] (rows=18262 width=1119) - Output:["d_month_seq"] - Filter Operator [FIL_105] (rows=18262 width=1119) - predicate:((d_year = 2000) and (d_moy = 2) and d_month_seq is not null) - TableScan [TS_23] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_48] - Select Operator [SEL_43] (rows=1 width=8) - Filter Operator [FIL_42] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_40] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 18 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_39] - Group By Operator [GBY_38] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_36] (rows=9131 width=1119) - Group By Operator [GBY_35] (rows=9131 width=1119) - Output:["_col0"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0 - Group By Operator [GBY_33] (rows=18262 width=1119) - Output:["_col0"],keys:d_month_seq - Select Operator [SEL_32] (rows=18262 width=1119) - Output:["d_month_seq"] - Filter Operator [FIL_106] (rows=18262 width=1119) - predicate:((d_year = 2000) and (d_moy = 2)) - Please refer to the previous TableScan [TS_23] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_63] - PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_113] (rows=696954748 width=88) - Conds:RS_60._col8=RS_61._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col12"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_110] (rows=88000001 width=860) - Conds:RS_19._col1=RS_20._col0(Inner),Output:["_col0","_col3"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col1 - Select Operator [SEL_15] (rows=80000000 width=860) - Output:["_col0","_col1"] - Filter Operator [FIL_103] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_13] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_104] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_16] (rows=40000000 width=1014) - default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_109] (rows=633595212 width=88) - Conds:RS_57._col0=RS_58._col1(Inner),Output:["_col1","_col3","_col4","_col6","_col8"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col1 - Select Operator [SEL_12] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_102] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) - TableScan [TS_10] (rows=575995635 width=88) - default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_57] + Group By Operator [GBY_50] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_49] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_106] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2) and d_month_seq is not null) + TableScan [TS_19] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col1 + Select Operator [SEL_46] (rows=766650239 width=88) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_112] (rows=766650239 width=88) + Conds:RS_43._col1=RS_44._col0(Inner),Output:["_col4","_col8","_col10","_col11"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_108] (rows=508200 width=1436) - Conds:RS_54._col2=RS_55._col2(Left Outer),Output:["_col0","_col1","_col3","_col4"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=462000 width=1436) + Merge Join Operator [MERGEJOIN_110] (rows=462000 width=1445) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_33] + Select Operator [SEL_18] (rows=462000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_100] (rows=462000 width=1436) + Filter Operator [FIL_104] (rows=462000 width=1436) predicate:i_item_sk is not null - TableScan [TS_0] (rows=462000 width=1436) + TableScan [TS_16] (rows=462000 width=1436) default@item,i,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_category"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_55] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_34] + Select Operator [SEL_32] (rows=1 width=8) + Filter Operator [FIL_31] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_29] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_28] + Group By Operator [GBY_27] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_25] (rows=9131 width=1119) + Group By Operator [GBY_24] (rows=9131 width=1119) + Output:["_col0"],keys:KEY._col0 + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0 + Group By Operator [GBY_22] (rows=18262 width=1119) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_21] (rows=18262 width=1119) + Output:["d_month_seq"] + Filter Operator [FIL_105] (rows=18262 width=1119) + predicate:((d_year = 2000) and (d_moy = 2)) + Please refer to the previous TableScan [TS_19] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_111] (rows=696954748 width=88) + Conds:RS_40._col2=RS_41._col0(Inner),Output:["_col1","_col4","_col8"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_109] (rows=88000001 width=860) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col3"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=80000000 width=860) + Output:["_col0","_col1"] + Filter Operator [FIL_102] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_6] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_103] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_9] (rows=40000000 width=1014) + default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_40] PartitionCols:_col2 - Select Operator [SEL_9] (rows=231000 width=1436) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=231000 width=1436) - Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_7] - PartitionCols:_col0 - Group By Operator [GBY_6] (rows=462000 width=1436) - Output:["_col0","_col1"],aggregations:["avg(i_current_price)"],keys:i_category - Filter Operator [FIL_101] (rows=462000 width=1436) - predicate:i_category is not null - TableScan [TS_3] (rows=462000 width=1436) - default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"] + Merge Join Operator [MERGEJOIN_108] (rows=633595212 width=88) + Conds:RS_37._col0=RS_38._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_100] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_101] (rows=73049 width=1119) + predicate:(d_date_sk is not null and d_month_seq is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]