diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java index 83b59f6c37..a44735e873 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HivePlannerContext.java @@ -35,14 +35,14 @@ public HivePlannerContext(HiveAlgorithmsConf algoConfig, HiveRulesRegistry registry, CalciteConnectionConfig calciteConfig, Set corrScalarRexSQWithAgg, - Set scalarAggNoGbyWindowing, HiveConfPlannerContext isCorrelatedColumns) { + HiveConfPlannerContext isCorrelatedColumns) { this.algoConfig = algoConfig; this.registry = registry; this.calciteConfig = calciteConfig; // this is to keep track if a subquery is correlated and contains aggregate // this is computed in CalcitePlanner while planning and is later required by subuery remove rule // hence this is passed using HivePlannerContext - this.subqueryConfig = new SubqueryConf(corrScalarRexSQWithAgg, scalarAggNoGbyWindowing); + this.subqueryConfig = new SubqueryConf(corrScalarRexSQWithAgg); this.isCorrelatedColumns = isCorrelatedColumns; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SubqueryConf.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SubqueryConf.java index e1ca2f5c56..9b26f4592a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SubqueryConf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/SubqueryConf.java @@ -24,19 +24,13 @@ public class SubqueryConf{ private Set corrScalarRexSQWithAgg; - private Set scalarAggWithoutGbyWindowing; - public SubqueryConf(Set corrScalarRexSQWithAgg, - Set scalarAggWithoutGbyWindowing) { + public SubqueryConf(Set corrScalarRexSQWithAgg){ this.corrScalarRexSQWithAgg = corrScalarRexSQWithAgg; - this.scalarAggWithoutGbyWindowing = scalarAggWithoutGbyWindowing; } public Set getCorrScalarRexSQWithAgg() { return corrScalarRexSQWithAgg; } - public Set getScalarAggWithoutGbyWindowing() { - return scalarAggWithoutGbyWindowing; - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index ea19302998..ec34e545c2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -27,6 +27,7 @@ import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rex.LogicVisitor; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.rex.RexNode; @@ -53,7 +54,6 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveSubQRemoveRelBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.SubqueryConf; @@ -106,11 +106,9 @@ public void onMatch(RelOptRuleCall call) { SubqueryConf subqueryConfig = filter.getCluster().getPlanner(). getContext().unwrap(SubqueryConf.class); boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel); - boolean hasNoWindowingAndNoGby = - subqueryConfig.getScalarAggWithoutGbyWindowing().contains(e.rel); - final RexNode target = apply(e, HiveFilter.getVariablesSet(e), logic, - builder, 1, fieldCount, isCorrScalarQuery, hasNoWindowingAndNoGby); + final RexNode target = apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e), logic, + builder, 1, fieldCount, isCorrScalarQuery); final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); builder.filter(shuttle.apply(filter.getCondition())); builder.project(fields(builder, filter.getRowType().getFieldCount())); @@ -132,11 +130,9 @@ public void onMatch(RelOptRuleCall call) { SubqueryConf subqueryConfig = project.getCluster().getPlanner().getContext().unwrap(SubqueryConf.class); boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel); - boolean hasNoWindowingAndNoGby = - subqueryConfig.getScalarAggWithoutGbyWindowing().contains(e.rel); - final RexNode target = apply(e, HiveFilter.getVariablesSet(e), - logic, builder, 1, fieldCount, isCorrScalarQuery, hasNoWindowingAndNoGby); + final RexNode target = apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e), + logic, builder, 1, fieldCount, isCorrScalarQuery); final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); builder.project(shuttle.apply(project.getProjects()), project.getRowType().getFieldNames()); @@ -166,23 +162,17 @@ private SqlTypeName getAggTypeForScalarSub(RexSubQuery e) { return relAgg.getAggCallList().get(0).getType().getSqlTypeName(); } - protected RexNode apply(RexSubQuery e, Set variablesSet, - RelOptUtil.Logic logic, - HiveSubQRemoveRelBuilder builder, int inputCount, int offset, - boolean isCorrScalarAgg, - boolean hasNoWindowingAndNoGby) { + protected RexNode apply(RelMetadataQuery mq, RexSubQuery e, Set variablesSet, + RelOptUtil.Logic logic, + HiveSubQRemoveRelBuilder builder, int inputCount, int offset, + boolean isCorrScalarAgg) { switch (e.getKind()) { case SCALAR_QUERY: // if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check // since it is guaranteed to produce at most one row - if(!hasNoWindowingAndNoGby) { - final List parentQueryFields = new ArrayList<>(); - if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) { - // we want to have project after join since sq_count_check's count() expression wouldn't - // be needed further up - parentQueryFields.addAll(builder.fields()); - } - + Double maxRowCount = mq.getMaxRowCount(e.rel); + boolean shouldIntroSQCountCheck = maxRowCount== null || maxRowCount > 1.0; + if(shouldIntroSQCountCheck) { builder.push(e.rel); // returns single row/column builder.aggregate(builder.groupKey(), builder.count(false, "cnt")); @@ -201,12 +191,7 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, } else { builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); } - - if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) { - builder.project(parentQueryFields); - } else { - offset++; - } + offset++; } if(isCorrScalarAgg) { // Transformation : diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 38afaf425c..3333dbb888 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -400,11 +400,11 @@ public RelNode genLogicalPlan(ASTNode ast) throws SemanticException { } public static RelOptPlanner createPlanner(HiveConf conf) { - return createPlanner(conf, new HashSet(), new HashSet()); + return createPlanner(conf, new HashSet()); } private static RelOptPlanner createPlanner( - HiveConf conf, Set corrScalarRexSQWithAgg, Set scalarAggNoGbyNoWin) { + HiveConf conf, Set corrScalarRexSQWithAgg) { final Double maxSplitSize = (double) HiveConf.getLongVar( conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); final Double maxMemory = (double) HiveConf.getLongVar( @@ -423,7 +423,7 @@ private static RelOptPlanner createPlanner( boolean heuristicMaterializationStrategy = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_SELECTION_STRATEGY).equals("heuristic"); HivePlannerContext confContext = new HivePlannerContext(algorithmsConf, registry, calciteConfig, - corrScalarRexSQWithAgg, scalarAggNoGbyNoWin, + corrScalarRexSQWithAgg, new HiveConfPlannerContext(isCorrelatedColumns, heuristicMaterializationStrategy)); return HiveVolcanoPlanner.createPlanner(confContext); } @@ -1727,7 +1727,6 @@ private RowResolver genRowResolver(Operator op, QB qb) { // this is to keep track if a subquery is correlated and contains aggregate // since this is special cased when it is rewritten in SubqueryRemoveRule Set corrScalarRexSQWithAgg = new HashSet(); - Set scalarAggNoGbyNoWin = new HashSet(); // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. @@ -1753,7 +1752,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu /* * recreate cluster, so that it picks up the additional traitDef */ - RelOptPlanner planner = createPlanner(conf, corrScalarRexSQWithAgg, scalarAggNoGbyNoWin); + RelOptPlanner planner = createPlanner(conf, corrScalarRexSQWithAgg); final RexBuilder rexBuilder = cluster.getRexBuilder(); final RelOptCluster optCluster = RelOptCluster.create(planner, rexBuilder); @@ -3141,8 +3140,8 @@ private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel, } private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel, - boolean forHavingClause, Set corrScalarQueries, - Set scalarQueriesWithAggNoWinNoGby) throws SemanticException { + boolean forHavingClause, Set corrScalarQueries) + throws SemanticException { List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); @@ -3180,9 +3179,6 @@ private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel, if(subqueryConfig[0]) { corrScalarQueries.add(originalSubQueryAST); } - if(subqueryConfig[1]) { - scalarQueriesWithAggNoWinNoGby.add(originalSubQueryAST); - } } } @@ -3341,12 +3337,10 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean Map subQueryToRelNode) throws CalciteSubquerySemanticException { Set corrScalarQueriesWithAgg = new HashSet(); - Set scalarQueriesWithAggNoWinNoGby = new HashSet(); boolean isSubQuery = false; try { //disallow subqueries which HIVE doesn't currently support - subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg, - scalarQueriesWithAggNoWinNoGby); + subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg); Deque stack = new ArrayDeque(); stack.push(node); @@ -3379,9 +3373,6 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean if (corrScalarQueriesWithAgg.contains(next)) { corrScalarRexSQWithAgg.add(subQueryRelNode); } - if (scalarQueriesWithAggNoWinNoGby.contains(next)) { - scalarAggNoGbyNoWin.add(subQueryRelNode); - } isSubQuery = true; break; default: diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java index e8dd57228f..a6da2647fe 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java @@ -62,7 +62,7 @@ public void testRuleFiredOnlyOnce() { // Create rules registry to not trigger a rule more than once HiveRulesRegistry registry = new HiveRulesRegistry(); HivePlannerContext context = new HivePlannerContext(null, registry, null, - null, null, null); + null, null); HepPlanner planner = new HepPlanner(programBuilder.build(), context); // Cluster diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index c4f8307f6d..d83aaa7f72 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -207,8 +207,8 @@ POSTHOOK: Input: default@part_null_n0 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -Warning: Shuffle Join MERGEJOIN[20][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -219,8 +219,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@tempty_n0 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[20][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -270,13 +270,13 @@ STAGE PLANS: alias: tempty_n0 Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c (type: char(2)) + expressions: UDFToDouble(c) (type: double) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: char(2)) + value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs Map 6 @@ -285,13 +285,13 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -324,7 +324,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 95 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: char(2)) + value expressions: _col1 (type: double) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -334,16 +334,16 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - residual filter predicates: {(UDFToDouble(_col7) > UDFToDouble(_col1))} - Statistics: Num rows: 8 Data size: 5720 Basic stats: PARTIAL Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col11 > _col1)} + Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5720 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 5720 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -489,8 +489,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where (select i from tnull_n0 limit 1) is null PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -510,30 +509,26 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 8 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: tnull_n0 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 3 Map Operator Tree: TableScan alias: tnull_n0 @@ -552,54 +547,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 8 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -608,42 +556,23 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col9 is null (type: boolean) - Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: _col9 is null (type: boolean) + Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -664,8 +593,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part where (select i from tnull_n0 limit 1) is null PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -1014,8 +942,7 @@ POSTHOOK: Input: default@part 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl 49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -1037,27 +964,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) - Reducer 6 <- Map 8 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) - Reducer 7 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 8 Map Operator Tree: TableScan alias: part @@ -1072,7 +984,42 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + residual filter predicates: {(_col2 > _col3)} + Statistics: Num rows: 8 Data size: 1816 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 1784 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 1784 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -1108,108 +1055,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS LAST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: first_value_window_0 - arguments: _col5 - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: first_value_window_0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2, _col3, _col4 - residual filter predicates: {(_col4 > _col1)} - Statistics: Num rows: 8 Data size: 1816 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col4 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1784 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 1784 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -1230,8 +1076,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[21][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -2890,8 +2735,7 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### 2346.3 3461.37 -Warning: Shuffle Join MERGEJOIN[78][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2909,20 +2753,30 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Union 9 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) - Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) - Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 15 <- Reducer 13 (SIMPLE_EDGE), Union 9 (CONTAINS) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 5 <- Union 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 11 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) - Reducer 7 <- Map 16 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Union 9 (CONTAINS) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 7 <- Union 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToLong(p_size) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: part @@ -2943,7 +2797,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 12 + Map 8 Map Operator Tree: TableScan alias: part @@ -2964,115 +2818,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 16 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 10 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 = 2L) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 11 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 13 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 14 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 15 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3095,6 +2841,29 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + residual filter predicates: {(_col9 > _col10)} + Statistics: Num rows: 8 Data size: 5080 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3115,13 +2884,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3143,7 +2906,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3163,55 +2926,16 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - residual filter predicates: {(UDFToLong(_col7) > _col0)} - Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col1) + aggregations: count() keys: _col0 (type: bigint) minReductionHashAggr: 0.0 mode: hash @@ -3223,10 +2947,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Union 4 - Vertex: Union 4 - Union 9 - Vertex: Union 9 + Union 6 + Vertex: Union 6 Stage: Stage-0 Fetch Operator @@ -3234,8 +2956,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[78][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 0a01852b9f..c4dfc05fc6 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -2986,8 +2986,7 @@ POSTHOOK: Input: default@part 6 false 6 false 7 false -Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -3005,36 +3004,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 8 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_size (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 8 Map Operator Tree: TableScan alias: part @@ -3049,41 +3024,23 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.0 - mode: hash + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_size (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + LLAP IO: no inputs + Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -3092,33 +3049,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col1 (type: boolean) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) - Reducer 6 + value expressions: _col1 (type: boolean) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -3132,7 +3070,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -3157,8 +3095,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/perf/spark/query23.q.out b/ql/src/test/results/clientpositive/perf/spark/query23.q.out index d01b5ac5d1..36bd0e08e9 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query23.q.out @@ -1,5 +1,5 @@ -Warning: Map Join MAPJOIN[274][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[275][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[204][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[205][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -126,17 +126,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 18 (PARTITION-LEVEL SORT, 398) - Reducer 15 <- Map 19 (PARTITION-LEVEL SORT, 975), Reducer 14 (PARTITION-LEVEL SORT, 975) - Reducer 16 <- Reducer 15 (GROUP, 481) - Reducer 17 <- Reducer 16 (GROUP, 1) - Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398) - Reducer 22 <- Map 26 (PARTITION-LEVEL SORT, 975), Reducer 21 (PARTITION-LEVEL SORT, 975) - Reducer 23 <- Reducer 22 (GROUP, 481) - Reducer 24 <- Reducer 23 (GROUP, 1) + Reducer 18 <- Map 17 (PARTITION-LEVEL SORT, 398), Map 22 (PARTITION-LEVEL SORT, 398) + Reducer 19 <- Map 23 (PARTITION-LEVEL SORT, 975), Reducer 18 (PARTITION-LEVEL SORT, 975) + Reducer 20 <- Reducer 19 (GROUP, 481) + Reducer 21 <- Reducer 20 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 13 + Map 17 Map Operator Tree: TableScan alias: store_sales @@ -156,7 +152,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(18,2)) Execution mode: vectorized - Map 18 + Map 22 Map Operator Tree: TableScan alias: date_dim @@ -175,7 +171,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 19 + Map 23 Map Operator Tree: TableScan alias: customer @@ -194,154 +190,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 20 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), (CAST( ss_quantity AS decimal(10,0)) * ss_sales_price) (type: decimal(18,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(18,2)) - Execution mode: vectorized - Map 25 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 26 - Map Operator Tree: - TableScan - alias: customer - filterExpr: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Reducer 14 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(18,2)) - Reducer 15 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(28,2)) - Reducer 16 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 17 - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - 2 - Reducer 21 + Reducer 18 Reduce Operator Tree: Join Operator condition map: @@ -357,7 +206,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(18,2)) - Reducer 22 + Reducer 19 Reduce Operator Tree: Join Operator condition map: @@ -380,7 +229,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(28,2)) - Reducer 23 + Reducer 20 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -403,7 +252,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(28,2)) - Reducer 24 + Reducer 21 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -413,26 +262,25 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - 2 + Select Operator + expressions: (0.95 * _col0) (type: decimal(31,4)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Stage: Stage-3 Spark Edges: - Reducer 43 <- Map 42 (PARTITION-LEVEL SORT, 398), Map 47 (PARTITION-LEVEL SORT, 398) - Reducer 44 <- Map 48 (PARTITION-LEVEL SORT, 975), Reducer 43 (PARTITION-LEVEL SORT, 975) - Reducer 45 <- Reducer 44 (GROUP, 481) - Reducer 46 <- Reducer 45 (GROUP, 1) - Reducer 50 <- Map 49 (PARTITION-LEVEL SORT, 398), Map 54 (PARTITION-LEVEL SORT, 398) - Reducer 51 <- Map 55 (PARTITION-LEVEL SORT, 975), Reducer 50 (PARTITION-LEVEL SORT, 975) - Reducer 52 <- Reducer 51 (GROUP, 481) - Reducer 53 <- Reducer 52 (GROUP, 1) + Reducer 40 <- Map 39 (PARTITION-LEVEL SORT, 398), Map 44 (PARTITION-LEVEL SORT, 398) + Reducer 41 <- Map 45 (PARTITION-LEVEL SORT, 975), Reducer 40 (PARTITION-LEVEL SORT, 975) + Reducer 42 <- Reducer 41 (GROUP, 481) + Reducer 43 <- Reducer 42 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 42 + Map 39 Map Operator Tree: TableScan alias: store_sales @@ -452,7 +300,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(18,2)) Execution mode: vectorized - Map 47 + Map 44 Map Operator Tree: TableScan alias: date_dim @@ -471,7 +319,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 48 + Map 45 Map Operator Tree: TableScan alias: customer @@ -490,154 +338,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 49 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), (CAST( ss_quantity AS decimal(10,0)) * ss_sales_price) (type: decimal(18,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(18,2)) - Execution mode: vectorized - Map 54 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 55 - Map Operator Tree: - TableScan - alias: customer - filterExpr: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Reducer 43 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(18,2)) - Reducer 44 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(28,2)) - Reducer 45 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 46 - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - 2 - Reducer 50 + Reducer 40 Reduce Operator Tree: Join Operator condition map: @@ -653,7 +354,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(18,2)) - Reducer 51 + Reducer 41 Reduce Operator Tree: Join Operator condition map: @@ -676,7 +377,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(28,2)) - Reducer 52 + Reducer 42 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -699,7 +400,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(28,2)) - Reducer 53 + Reducer 43 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -709,28 +410,31 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - 2 + Select Operator + expressions: (0.95 * _col0) (type: decimal(31,4)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 38 (GROUP PARTITION-LEVEL SORT, 481) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 305), Map 35 (PARTITION-LEVEL SORT, 305) - Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 935), Map 30 (PARTITION-LEVEL SORT, 935) - Reducer 29 <- Reducer 28 (GROUP, 437) + Reducer 10 <- Reducer 31 (GROUP PARTITION-LEVEL SORT, 481) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 935), Map 16 (PARTITION-LEVEL SORT, 935) + Reducer 15 <- Reducer 14 (GROUP, 437) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 305), Map 28 (PARTITION-LEVEL SORT, 305) + Reducer 25 <- Map 24 (PARTITION-LEVEL SORT, 154), Map 28 (PARTITION-LEVEL SORT, 154) + Reducer 26 <- Reducer 10 (PARTITION-LEVEL SORT, 209), Reducer 25 (PARTITION-LEVEL SORT, 209) + Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 444), Reducer 37 (PARTITION-LEVEL SORT, 444) Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 376), Reducer 2 (PARTITION-LEVEL SORT, 376) - Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 154), Map 35 (PARTITION-LEVEL SORT, 154) - Reducer 33 <- Reducer 10 (PARTITION-LEVEL SORT, 209), Reducer 32 (PARTITION-LEVEL SORT, 209) - Reducer 34 <- Reducer 33 (PARTITION-LEVEL SORT, 543), Reducer 58 (PARTITION-LEVEL SORT, 543) - Reducer 37 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 36 (PARTITION-LEVEL SORT, 398) - Reducer 38 <- Map 12 (PARTITION-LEVEL SORT, 442), Reducer 37 (PARTITION-LEVEL SORT, 442) - Reducer 4 <- Reducer 29 (PARTITION-LEVEL SORT, 727), Reducer 3 (PARTITION-LEVEL SORT, 727) - Reducer 5 <- Reducer 34 (GROUP, 1), Reducer 4 (GROUP, 1) - Reducer 58 <- Reducer 28 (GROUP, 437) + Reducer 30 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 29 (PARTITION-LEVEL SORT, 398) + Reducer 31 <- Map 12 (PARTITION-LEVEL SORT, 442), Reducer 30 (PARTITION-LEVEL SORT, 442) + Reducer 37 <- Reducer 14 (GROUP, 437) + Reducer 4 <- Reducer 15 (PARTITION-LEVEL SORT, 628), Reducer 3 (PARTITION-LEVEL SORT, 628) + Reducer 5 <- Reducer 27 (GROUP, 1), Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -793,7 +497,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 27 + Map 13 Map Operator Tree: TableScan alias: store_sales @@ -813,7 +517,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(18,2)) Execution mode: vectorized - Map 30 + Map 16 Map Operator Tree: TableScan alias: customer @@ -832,7 +536,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 31 + Map 24 Map Operator Tree: TableScan alias: web_sales @@ -852,7 +556,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 35 + Map 28 Map Operator Tree: TableScan alias: date_dim @@ -871,7 +575,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 36 + Map 29 Map Operator Tree: TableScan alias: store_sales @@ -921,23 +625,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 58079562 Data size: 5123795819 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 28 + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -960,7 +648,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(28,2)) - Reducer 29 + Reducer 15 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -974,45 +662,48 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 input vertices: - 0 Reducer 17 - 1 Reducer 24 - Statistics: Num rows: 316797606 Data size: 137243150824 Basic stats: COMPLETE Column stats: NONE + 1 Reducer 21 + Statistics: Num rows: 316797606 Data size: 99227438104 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 > (0.95 * _col1)) (type: boolean) - Statistics: Num rows: 105599202 Data size: 45747716941 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > _col2) (type: boolean) + Statistics: Num rows: 105599202 Data size: 33075812701 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int) + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 105599202 Data size: 45747716941 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 105599202 Data size: 45747716941 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Statistics: Num rows: 105599202 Data size: 33075812701 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 105599202 Data size: 33075812701 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 105599202 Data size: 33075812701 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 32 + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Reducer 25 Reduce Operator Tree: Join Operator condition map: @@ -1028,7 +719,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 33 + Reducer 26 Reduce Operator Tree: Join Operator condition map: @@ -1044,11 +735,11 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 34 + Reducer 27 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 outputColumnNames: _col3, _col4 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1065,7 +756,23 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(28,2)) - Reducer 37 + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) + Reducer 30 Reduce Operator Tree: Join Operator condition map: @@ -1081,7 +788,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) - Reducer 38 + Reducer 31 Reduce Operator Tree: Join Operator condition map: @@ -1104,11 +811,50 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) + Reducer 37 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 43 + Statistics: Num rows: 316797606 Data size: 99227438104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > _col2) (type: boolean) + Statistics: Num rows: 105599202 Data size: 33075812701 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 105599202 Data size: 33075812701 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 105599202 Data size: 33075812701 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 105599202 Data size: 33075812701 Basic stats: COMPLETE Column stats: NONE Reducer 4 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 outputColumnNames: _col3, _col4 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1140,42 +886,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 58 - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 - 1 - 2 - outputColumnNames: _col1, _col2, _col3 - input vertices: - 0 Reducer 46 - 1 Reducer 53 - Statistics: Num rows: 316797606 Data size: 137243150824 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 > (0.95 * _col1)) (type: boolean) - Statistics: Num rows: 105599202 Data size: 45747716941 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 105599202 Data size: 45747716941 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 105599202 Data size: 45747716941 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query44.q.out b/ql/src/test/results/clientpositive/perf/spark/query44.q.out index 3a93a646b1..ccd240b397 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query44.q.out @@ -200,13 +200,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (_col1 / _col2) (type: decimal(37,22)) + expressions: (0.9 * (_col1 / _col2)) (type: decimal(38,22)) outputColumnNames: _col0 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(37,22)) + value expressions: _col0 (type: decimal(38,22)) Reducer 12 Reduce Operator Tree: Join Operator @@ -251,7 +251,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 10367842752596232 Data size: 1839676035841599918 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > (0.9 * _col2)) (type: boolean) + predicate: (_col1 > _col2) (type: boolean) Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), _col1 (type: decimal(37,22)) @@ -311,13 +311,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (_col1 / _col2) (type: decimal(37,22)) + expressions: (0.9 * (_col1 / _col2)) (type: decimal(38,22)) outputColumnNames: _col0 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(37,22)) + value expressions: _col0 (type: decimal(38,22)) Reducer 2 Reduce Operator Tree: Join Operator @@ -399,7 +399,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 10367842752596232 Data size: 1839676035841599918 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > (0.9 * _col2)) (type: boolean) + predicate: (_col1 > _col2) (type: boolean) Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), _col1 (type: decimal(37,22)) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out index 3ee913bbab..d41b197152 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out @@ -1,7 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[582][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 27' is a cross product -Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 28' is a cross product -Warning: Shuffle Join MERGEJOIN[585][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product -Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 32' is a cross product +Warning: Shuffle Join MERGEJOIN[448][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[450][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 30' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -120,57 +118,10 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveAggregate(group=[{}], agg#0=[sum($0)]) - HiveProject(sales=[$0]) + HiveProject($f0=[$0]) HiveUnion(all=[true]) - HiveProject(sales=[*(CAST($5):DECIMAL(10, 0), $6)]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0]) - HiveJoin(condition=[>($1, *(0.95:DECIMAL(3, 2), $3))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{2}], agg#0=[sum($1)]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[IS NOT NULL($3)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(c_customer_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject - HiveProject($f0=[$0]) - HiveAggregate(group=[{}], agg#0=[count($0)]) - HiveProject(c_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{0}], agg#0=[sum($3)]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{}], agg#0=[max($1)]) - HiveProject(c_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{0}], agg#0=[sum($3)]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[*(CAST($4):DECIMAL(10, 0), $5)]) + HiveSemiJoin(condition=[=($2, $7)], joinType=[inner]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) HiveAggregate(group=[{1}]) @@ -195,10 +146,8 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(sales=[*(CAST($5):DECIMAL(10, 0), $6)]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0]) - HiveJoin(condition=[>($1, *(0.95:DECIMAL(3, 2), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0]) + HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], $f1=[$1]) HiveAggregate(group=[{2}], agg#0=[sum($1)]) HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -208,42 +157,23 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(c_customer_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject - HiveProject($f0=[$0]) - HiveAggregate(group=[{}], agg#0=[count($0)]) - HiveProject(c_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{0}], agg#0=[sum($3)]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{}], agg#0=[max($1)]) - HiveProject(c_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{0}], agg#0=[sum($3)]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(*=[*(0.95:DECIMAL(3, 2), $0)]) + HiveAggregate(group=[{}], agg#0=[max($1)]) + HiveProject(c_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($3)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[*(CAST($4):DECIMAL(10, 0), $5)]) + HiveSemiJoin(condition=[=($3, $7)], joinType=[inner]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) HiveAggregate(group=[{1}]) @@ -268,4 +198,30 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_customer_sk=[$0]) + HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{2}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(*=[*(0.95:DECIMAL(3, 2), $0)]) + HiveAggregate(group=[{}], agg#0=[max($1)]) + HiveProject(c_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($3)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out index e6d6c0c875..a348cc4c02 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out @@ -1,7 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[442][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product -Warning: Shuffle Join MERGEJOIN[443][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product -Warning: Shuffle Join MERGEJOIN[445][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product -Warning: Shuffle Join MERGEJOIN[446][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[352][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 17' is a cross product +Warning: Shuffle Join MERGEJOIN[354][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 23' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -125,31 +123,15 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) HiveSemiJoin(condition=[=($3, $7)], joinType=[inner]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0]) - HiveJoin(condition=[>($1, *(0.95:DECIMAL(3, 2), $3))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[IS NOT NULL($3)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject - HiveProject($f0=[$0]) - HiveAggregate(group=[{}], agg#0=[count($0)]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{1}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0]) + HiveProject(ss_customer_sk=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(*=[*(0.95:DECIMAL(3, 2), $0)]) HiveAggregate(group=[{}], agg#0=[max($1)]) HiveProject(ss_customer_sk=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($2)]) @@ -184,31 +166,15 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) HiveSemiJoin(condition=[=($2, $7)], joinType=[inner]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0]) - HiveJoin(condition=[>($1, *(0.95:DECIMAL(3, 2), $3))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[IS NOT NULL($3)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject - HiveProject($f0=[$0]) - HiveAggregate(group=[{}], agg#0=[count($0)]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveAggregate(group=[{1}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0]) + HiveProject(ss_customer_sk=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[CAST($3):INTEGER NOT NULL], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[IS NOT NULL($3)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(*=[*(0.95:DECIMAL(3, 2), $0)]) HiveAggregate(group=[{}], agg#0=[max($1)]) HiveProject(ss_customer_sk=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($2)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out index 7ba03a1644..c7bfb39524 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out @@ -159,7 +159,7 @@ Stage-0 SHUFFLE [RS_19] PartitionCols:0 Filter Operator [FIL_18] (rows=20854 width=228) - predicate:(_col1 > (0.9 * _col2)) + predicate:(_col1 > _col2) Merge Join Operator [MERGEJOIN_103] (rows=62562 width=228) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out index 7ba6715664..e6cba09258 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out @@ -1,7 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[442][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 19' is a cross product -Warning: Shuffle Join MERGEJOIN[443][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 20' is a cross product -Warning: Shuffle Join MERGEJOIN[445][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 23' is a cross product -Warning: Shuffle Join MERGEJOIN[446][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[352][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 17' is a cross product +Warning: Shuffle Join MERGEJOIN[354][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 23' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -122,37 +120,34 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 15 <- Reducer 27 (BROADCAST_EDGE) -Map 28 <- Reducer 7 (BROADCAST_EDGE) -Map 30 <- Reducer 36 (BROADCAST_EDGE) -Map 38 <- Reducer 14 (BROADCAST_EDGE) -Map 39 <- Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Map 38 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 15 <- Reducer 7 (BROADCAST_EDGE) +Map 19 <- Reducer 26 (BROADCAST_EDGE) +Map 27 <- Reducer 33 (BROADCAST_EDGE) +Map 35 <- Reducer 14 (BROADCAST_EDGE) +Map 36 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 35 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 24 (ONE_TO_ONE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 21 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE), Reducer 22 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 29 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Reducer 22 (CUSTOM_SIMPLE_EDGE), Reducer 25 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (CUSTOM_SIMPLE_EDGE), Reducer 40 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 28 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) -Reducer 32 <- Map 37 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 33 <- Reducer 32 (SIMPLE_EDGE) -Reducer 34 <- Reducer 32 (SIMPLE_EDGE) -Reducer 36 <- Map 35 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 40 <- Map 39 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Reducer 22 (CUSTOM_SIMPLE_EDGE), Reducer 37 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 29 <- Map 34 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Reducer 29 (SIMPLE_EDGE) +Reducer 33 <- Map 32 (CUSTOM_SIMPLE_EDGE) +Reducer 37 <- Map 36 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) @@ -162,346 +157,312 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_535] - Group By Operator [GBY_534] (rows=1 width=112) + File Output Operator [FS_432] + Group By Operator [GBY_431] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] <-Reducer 12 [CONTAINS] - Reduce Output Operator [RS_459] - Group By Operator [GBY_458] (rows=1 width=112) + Reduce Output Operator [RS_367] + Group By Operator [GBY_366] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_456] (rows=155 width=112) + Select Operator [SEL_364] (rows=155 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_455] (rows=155 width=0) - Conds:RS_198._col1=RS_562._col0(Left Semi),Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_363] (rows=155 width=0) + Conds:RS_146._col1=RS_453._col0(Left Semi),Output:["_col3","_col4"] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_198] + SHUFFLE [RS_146] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_447] (rows=3941101 width=118) - Conds:RS_193._col2=RS_194._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_355] (rows=3941102 width=118) + Conds:RS_141._col2=RS_447._col0(Inner),Output:["_col1","_col3","_col4"] <-Reducer 10 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_193] + PARTITION_ONLY_SHUFFLE [RS_141] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_437] (rows=3941102 width=122) - Conds:RS_540._col0=RS_464._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_348] (rows=3941102 width=122) + Conds:RS_437._col0=RS_372._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_464] + PARTITION_ONLY_SHUFFLE [RS_372] PartitionCols:_col0 - Select Operator [SEL_461] (rows=50 width=4) + Select Operator [SEL_369] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_460] (rows=50 width=12) + Filter Operator [FIL_368] (rows=50 width=12) predicate:((d_moy = 1) and (d_year = 1999)) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_540] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_437] PartitionCols:_col0 - Select Operator [SEL_539] (rows=143930993 width=127) + Select Operator [SEL_436] (rows=143930993 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_538] (rows=143930993 width=127) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_191_date_dim_d_date_sk_min) AND DynamicValue(RS_191_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_191_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_101] (rows=144002668 width=127) + Filter Operator [FIL_435] (rows=143930993 width=127) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_139_date_dim_d_date_sk_min) AND DynamicValue(RS_139_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_139_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_75] (rows=144002668 width=127) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_537] - Group By Operator [GBY_536] (rows=1 width=12) + BROADCAST [RS_434] + Group By Operator [GBY_433] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_469] - Group By Operator [GBY_467] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_377] + Group By Operator [GBY_375] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_465] (rows=50 width=4) + Select Operator [SEL_373] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_461] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_194] + Please refer to the previous Select Operator [SEL_369] + <-Reducer 24 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_447] PartitionCols:_col0 - Select Operator [SEL_168] (rows=471875 width=3) - Output:["_col0"] - Filter Operator [FIL_167] (rows=471875 width=227) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_446] (rows=1415626 width=227) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 23 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_164] - Merge Join Operator [MERGEJOIN_445] (rows=1 width=112) - Conds:(Inner),Output:["_col1"] - <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_546] - Select Operator [SEL_545] (rows=1 width=8) - Filter Operator [FIL_544] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_543] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_542] (rows=1 width=8) - Group By Operator [GBY_541] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_497] - Group By Operator [GBY_493] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_489] (rows=50562 width=112) - Output:["_col0"] - Group By Operator [GBY_486] (rows=50562 width=112) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=455058 width=112) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col1 - Merge Join Operator [MERGEJOIN_433] (rows=18762463 width=112) - Conds:RS_485._col0=RS_477._col0(Inner),Output:["_col1","_col2"] - <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_477] - PartitionCols:_col0 - Select Operator [SEL_476] (rows=2609 width=4) - Output:["_col0"] - Filter Operator [FIL_475] (rows=2609 width=8) - predicate:(d_year) IN (1999, 2000, 2001, 2002) - TableScan [TS_9] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_485] - PartitionCols:_col0 - Select Operator [SEL_484] (rows=525327388 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_483] (rows=525327388 width=118) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_6] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_482] - Group By Operator [GBY_481] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_480] - Group By Operator [GBY_479] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_478] (rows=2609 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_476] - <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_548] - Group By Operator [GBY_547] (rows=1 width=112) - Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_498] - Group By Operator [GBY_494] (rows=1 width=112) - Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_490] (rows=50562 width=112) - Output:["_col1"] - Please refer to the previous Group By Operator [GBY_486] - <-Reducer 40 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_556] - Group By Operator [GBY_555] (rows=1415626 width=115) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_554] - PartitionCols:_col0 - Group By Operator [GBY_553] (rows=550080312 width=115) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_552] (rows=550080312 width=114) - Output:["_col0","_col1"] - Filter Operator [FIL_551] (rows=550080312 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_193_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_193_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_193_web_sales_ws_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) - TableScan [TS_153] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_550] - Group By Operator [GBY_549] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 10 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_411] - Group By Operator [GBY_410] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_409] (rows=3941102 width=7) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_437] - <-Reducer 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_562] + Group By Operator [GBY_446] (rows=235937 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_114] + PartitionCols:_col0 + Group By Operator [GBY_113] (rows=235937 width=3) + Output:["_col0"],keys:_col0 + Select Operator [SEL_112] (rows=471875 width=227) + Output:["_col0"] + Filter Operator [FIL_111] (rows=471875 width=227) + predicate:(_col1 > _col2) + Merge Join Operator [MERGEJOIN_354] (rows=1415626 width=227) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_409] + Select Operator [SEL_407] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_406] (rows=1 width=112) + Output:["_col0"],aggregations:["max(VALUE._col0)"] + <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_405] + Group By Operator [GBY_404] (rows=1 width=112) + Output:["_col0"],aggregations:["max(_col1)"] + Select Operator [SEL_403] (rows=50562 width=112) + Output:["_col1"] + Group By Operator [GBY_402] (rows=50562 width=112) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Group By Operator [GBY_24] (rows=455058 width=112) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col1 + Merge Join Operator [MERGEJOIN_345] (rows=18762463 width=112) + Conds:RS_401._col0=RS_393._col0(Inner),Output:["_col1","_col2"] + <-Map 25 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_393] + PartitionCols:_col0 + Select Operator [SEL_392] (rows=2609 width=4) + Output:["_col0"] + Filter Operator [FIL_391] (rows=2609 width=8) + predicate:(d_year) IN (1999, 2000, 2001, 2002) + TableScan [TS_17] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_401] + PartitionCols:_col0 + Select Operator [SEL_400] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_399] (rows=525327388 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_14] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_398] + Group By Operator [GBY_397] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_396] + Group By Operator [GBY_395] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_394] (rows=2609 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_392] + <-Reducer 37 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_445] + Group By Operator [GBY_444] (rows=1415626 width=115) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_443] + PartitionCols:_col0 + Group By Operator [GBY_442] (rows=550080312 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_441] (rows=550080312 width=114) + Output:["_col0","_col1"] + Filter Operator [FIL_440] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_141_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_141_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_141_web_sales_ws_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + TableScan [TS_81] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_439] + Group By Operator [GBY_438] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_313] + Group By Operator [GBY_312] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_311] (rows=3941102 width=7) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_348] + <-Reducer 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_453] PartitionCols:_col0 - Group By Operator [GBY_561] (rows=2235 width=4) + Group By Operator [GBY_452] (rows=2235 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_560] (rows=1943705 width=4) + Select Operator [SEL_451] (rows=1943705 width=4) Output:["_col0"] - Filter Operator [FIL_559] (rows=1943705 width=106) + Filter Operator [FIL_450] (rows=1943705 width=106) predicate:(_col2 > 4L) - Select Operator [SEL_558] (rows=5831115 width=106) + Select Operator [SEL_449] (rows=5831115 width=106) Output:["_col0","_col2"] - Group By Operator [GBY_557] (rows=5831115 width=106) + Group By Operator [GBY_448] (rows=5831115 width=106) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_185] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_133] PartitionCols:_col0, _col1 - Group By Operator [GBY_83] (rows=19646398 width=106) + Group By Operator [GBY_57] (rows=19646398 width=106) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_436] (rows=19646398 width=98) - Conds:RS_79._col1=RS_527._col0(Inner),Output:["_col3","_col4"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_527] + Merge Join Operator [MERGEJOIN_347] (rows=19646398 width=98) + Conds:RS_53._col1=RS_424._col0(Inner),Output:["_col3","_col4"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_424] PartitionCols:_col0 - Select Operator [SEL_526] (rows=462000 width=188) + Select Operator [SEL_423] (rows=462000 width=188) Output:["_col0"] - TableScan [TS_74] (rows=462000 width=4) + TableScan [TS_48] (rows=462000 width=4) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_79] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_53] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_435] (rows=19646398 width=98) - Conds:RS_525._col0=RS_517._col0(Inner),Output:["_col1","_col3"] - <-Map 35 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_517] + Merge Join Operator [MERGEJOIN_346] (rows=19646398 width=98) + Conds:RS_422._col0=RS_414._col0(Inner),Output:["_col1","_col3"] + <-Map 32 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_414] PartitionCols:_col0 - Select Operator [SEL_516] (rows=2609 width=98) + Select Operator [SEL_413] (rows=2609 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_515] (rows=2609 width=102) + Filter Operator [FIL_412] (rows=2609 width=102) predicate:(d_year) IN (1999, 2000, 2001, 2002) - TableScan [TS_71] (rows=73049 width=102) + TableScan [TS_45] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_year"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_525] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_422] PartitionCols:_col0 - Select Operator [SEL_524] (rows=550076554 width=7) + Select Operator [SEL_421] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_523] (rows=550076554 width=7) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_77_date_dim_d_date_sk_min) AND DynamicValue(RS_77_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_77_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) - TableScan [TS_68] (rows=575995635 width=7) + Filter Operator [FIL_420] (rows=550076554 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) + TableScan [TS_42] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_522] - Group By Operator [GBY_521] (rows=1 width=12) + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_419] + Group By Operator [GBY_418] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_520] - Group By Operator [GBY_519] (rows=1 width=12) + <-Map 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_417] + Group By Operator [GBY_416] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_518] (rows=2609 width=4) + Select Operator [SEL_415] (rows=2609 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_516] + Please refer to the previous Select Operator [SEL_413] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_454] - Group By Operator [GBY_453] (rows=1 width=112) + Reduce Output Operator [RS_362] + Group By Operator [GBY_361] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_451] (rows=304 width=112) + Select Operator [SEL_359] (rows=304 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_450] (rows=304 width=0) - Conds:RS_97._col2=RS_533._col0(Left Semi),Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_358] (rows=304 width=0) + Conds:RS_71._col2=RS_430._col0(Left Semi),Output:["_col3","_col4"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_97] + SHUFFLE [RS_71] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_444] (rows=7751875 width=98) - Conds:RS_92._col1=RS_93._col0(Inner),Output:["_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_353] (rows=7751875 width=98) + Conds:RS_66._col1=RS_411._col0(Inner),Output:["_col2","_col3","_col4"] <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_92] + PARTITION_ONLY_SHUFFLE [RS_66] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_432] (rows=7751875 width=101) - Conds:RS_474._col0=RS_462._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_344] (rows=7751875 width=101) + Conds:RS_382._col0=RS_370._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_462] + PARTITION_ONLY_SHUFFLE [RS_370] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_461] + Please refer to the previous Select Operator [SEL_369] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_474] + SHUFFLE [RS_382] PartitionCols:_col0 - Select Operator [SEL_473] (rows=285117831 width=127) + Select Operator [SEL_381] (rows=285117831 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_472] (rows=285117831 width=127) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_90_date_dim_d_date_sk_min) AND DynamicValue(RS_90_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_90_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + Filter Operator [FIL_380] (rows=285117831 width=127) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_64_date_dim_d_date_sk_min) AND DynamicValue(RS_64_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_64_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=127) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_471] - Group By Operator [GBY_470] (rows=1 width=12) + BROADCAST [RS_379] + Group By Operator [GBY_378] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_468] - Group By Operator [GBY_466] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_376] + Group By Operator [GBY_374] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_463] (rows=50 width=4) + Select Operator [SEL_371] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_461] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_93] + Please refer to the previous Select Operator [SEL_369] + <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_411] PartitionCols:_col0 - Select Operator [SEL_67] (rows=471875 width=3) - Output:["_col0"] - Filter Operator [FIL_66] (rows=471875 width=227) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_443] (rows=1415626 width=227) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_63] - Merge Join Operator [MERGEJOIN_442] (rows=1 width=112) - Conds:(Inner),Output:["_col1"] - <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_504] - Select Operator [SEL_503] (rows=1 width=8) - Filter Operator [FIL_502] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_501] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_500] (rows=1 width=8) - Group By Operator [GBY_499] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_495] - Group By Operator [GBY_491] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_487] (rows=50562 width=112) - Output:["_col0"] - Please refer to the previous Group By Operator [GBY_486] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_506] - Group By Operator [GBY_505] (rows=1 width=112) - Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_496] - Group By Operator [GBY_492] (rows=1 width=112) - Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_488] (rows=50562 width=112) - Output:["_col1"] - Please refer to the previous Group By Operator [GBY_486] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_514] - Group By Operator [GBY_513] (rows=1415626 width=115) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_512] - PartitionCols:_col0 - Group By Operator [GBY_511] (rows=550080312 width=115) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_510] (rows=550080312 width=114) - Output:["_col0","_col1"] - Filter Operator [FIL_509] (rows=550080312 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_92_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_92_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_92_catalog_sales_cs_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) - TableScan [TS_52] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_508] - Group By Operator [GBY_507] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_338] - Group By Operator [GBY_337] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_336] (rows=7751875 width=6) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_432] - <-Reducer 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_533] + Group By Operator [GBY_410] (rows=235937 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Group By Operator [GBY_38] (rows=235937 width=3) + Output:["_col0"],keys:_col0 + Select Operator [SEL_37] (rows=471875 width=227) + Output:["_col0"] + Filter Operator [FIL_36] (rows=471875 width=227) + predicate:(_col1 > _col2) + Merge Join Operator [MERGEJOIN_352] (rows=1415626 width=227) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_408] + Please refer to the previous Select Operator [SEL_407] + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_390] + Group By Operator [GBY_389] (rows=1415626 width=115) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_388] + PartitionCols:_col0 + Group By Operator [GBY_387] (rows=550080312 width=115) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_386] (rows=550080312 width=114) + Output:["_col0","_col1"] + Filter Operator [FIL_385] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_66_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_66_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_66_catalog_sales_cs_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + TableScan [TS_6] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_384] + Group By Operator [GBY_383] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_250] + Group By Operator [GBY_249] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_248] (rows=7751875 width=6) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_344] + <-Reducer 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_430] PartitionCols:_col0 - Group By Operator [GBY_532] (rows=2235 width=4) + Group By Operator [GBY_429] (rows=2235 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_531] (rows=1943705 width=4) + Select Operator [SEL_428] (rows=1943705 width=4) Output:["_col0"] - Filter Operator [FIL_530] (rows=1943705 width=106) + Filter Operator [FIL_427] (rows=1943705 width=106) predicate:(_col2 > 4L) - Select Operator [SEL_529] (rows=5831115 width=106) + Select Operator [SEL_426] (rows=5831115 width=106) Output:["_col0","_col2"] - Group By Operator [GBY_528] (rows=5831115 width=106) + Group By Operator [GBY_425] (rows=5831115 width=106) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_84] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_58] PartitionCols:_col0, _col1 - Please refer to the previous Group By Operator [GBY_83] + Please refer to the previous Group By Operator [GBY_57] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out index 4e6c083f5b..8b36371dba 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out @@ -138,7 +138,7 @@ Stage-0 SHUFFLE [RS_19] PartitionCols:0 Filter Operator [FIL_18] (rows=20854 width=228) - predicate:(_col1 > (0.9 * _col2)) + predicate:(_col1 > _col2) Merge Join Operator [MERGEJOIN_103] (rows=62562 width=228) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out index 8062cf16c6..c305e15646 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out @@ -1,7 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[582][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 27' is a cross product -Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 28' is a cross product -Warning: Shuffle Join MERGEJOIN[585][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 31' is a cross product -Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 32' is a cross product +Warning: Shuffle Join MERGEJOIN[448][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[450][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 30' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -123,13 +121,13 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 9 (BROADCAST_EDGE) Map 15 <- Reducer 20 (BROADCAST_EDGE) -Map 22 <- Reducer 35 (BROADCAST_EDGE) -Map 36 <- Reducer 7 (BROADCAST_EDGE) -Map 42 <- Reducer 14 (BROADCAST_EDGE) -Map 43 <- Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Map 42 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Map 22 <- Reducer 7 (BROADCAST_EDGE) +Map 33 <- Reducer 36 (BROADCAST_EDGE) +Map 37 <- Reducer 14 (BROADCAST_EDGE) +Map 38 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 37 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) @@ -137,24 +135,19 @@ Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) -Reducer 24 <- Map 39 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 25 <- Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (CUSTOM_SIMPLE_EDGE), Reducer 29 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE), Reducer 38 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Reducer 25 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (CUSTOM_SIMPLE_EDGE), Reducer 29 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 25 (CUSTOM_SIMPLE_EDGE) -Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 41 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Reducer 25 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 34 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 36 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) -Reducer 38 <- Reducer 37 (SIMPLE_EDGE) -Reducer 4 <- Reducer 28 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 40 <- Map 39 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) -Reducer 41 <- Reducer 40 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 26 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE) +Reducer 32 <- Reducer 31 (SIMPLE_EDGE) +Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 36 <- Map 35 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 25 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) @@ -164,367 +157,323 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_679] - Group By Operator [GBY_678] (rows=1 width=112) + File Output Operator [FS_530] + Group By Operator [GBY_529] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] <-Reducer 12 [CONTAINS] - Reduce Output Operator [RS_597] - Group By Operator [GBY_596] (rows=1 width=112) + Reduce Output Operator [RS_461] + Group By Operator [GBY_460] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_594] (rows=7 width=112) + Select Operator [SEL_458] (rows=4 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_593] (rows=7 width=16) - Conds:RS_240._col2=RS_241._col0(Inner),Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_457] (rows=4 width=29) + Conds:RS_172._col2=RS_173._col0(Left Semi),Output:["_col3","_col4"] <-Reducer 11 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_240] + PARTITION_ONLY_SHUFFLE [RS_172] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_581] (rows=155 width=0) - Conds:RS_237._col1=RS_633._col0(Inner),Output:["_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_447] (rows=155 width=0) + Conds:RS_167._col1=RS_497._col0(Inner),Output:["_col2","_col3","_col4"] <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_633] + SHUFFLE [RS_497] PartitionCols:_col0 - Group By Operator [GBY_631] (rows=2235 width=4) + Group By Operator [GBY_495] (rows=2235 width=4) Output:["_col0"],keys:_col1 - Select Operator [SEL_630] (rows=6548799 width=290) + Select Operator [SEL_494] (rows=6548799 width=290) Output:["_col1"] - Filter Operator [FIL_629] (rows=6548799 width=290) + Filter Operator [FIL_493] (rows=6548799 width=290) predicate:(_col3 > 4L) - Select Operator [SEL_628] (rows=19646398 width=290) + Select Operator [SEL_492] (rows=19646398 width=290) Output:["_col1","_col3"] - Group By Operator [GBY_627] (rows=19646398 width=290) + Group By Operator [GBY_491] (rows=19646398 width=290) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0 Group By Operator [GBY_22] (rows=19646398 width=290) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col3, _col5 - Merge Join Operator [MERGEJOIN_566] (rows=19646398 width=282) - Conds:RS_18._col1=RS_626._col0(Inner),Output:["_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_436] (rows=19646398 width=282) + Conds:RS_18._col1=RS_490._col0(Inner),Output:["_col3","_col4","_col5"] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_626] + SHUFFLE [RS_490] PartitionCols:_col0 - Select Operator [SEL_625] (rows=462000 width=188) + Select Operator [SEL_489] (rows=462000 width=188) Output:["_col0","_col1"] - Filter Operator [FIL_624] (rows=462000 width=188) + Filter Operator [FIL_488] (rows=462000 width=188) predicate:i_item_sk is not null TableScan [TS_12] (rows=462000 width=188) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_565] (rows=19646398 width=98) - Conds:RS_623._col0=RS_615._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_435] (rows=19646398 width=98) + Conds:RS_487._col0=RS_479._col0(Inner),Output:["_col1","_col3"] <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_615] + PARTITION_ONLY_SHUFFLE [RS_479] PartitionCols:_col0 - Select Operator [SEL_614] (rows=2609 width=98) + Select Operator [SEL_478] (rows=2609 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_613] (rows=2609 width=102) + Filter Operator [FIL_477] (rows=2609 width=102) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_year"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_623] + SHUFFLE [RS_487] PartitionCols:_col0 - Select Operator [SEL_622] (rows=550076554 width=7) + Select Operator [SEL_486] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_621] (rows=550076554 width=7) + Filter Operator [FIL_485] (rows=550076554 width=7) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) TableScan [TS_6] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_620] - Group By Operator [GBY_619] (rows=1 width=12) + BROADCAST [RS_484] + Group By Operator [GBY_483] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_618] - Group By Operator [GBY_617] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_482] + Group By Operator [GBY_481] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_616] (rows=2609 width=4) + Select Operator [SEL_480] (rows=2609 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_614] + Please refer to the previous Select Operator [SEL_478] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_237] + SHUFFLE [RS_167] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_572] (rows=3941102 width=122) - Conds:RS_684._col0=RS_602._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_440] (rows=3941102 width=122) + Conds:RS_535._col0=RS_466._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_602] + PARTITION_ONLY_SHUFFLE [RS_466] PartitionCols:_col0 - Select Operator [SEL_599] (rows=50 width=4) + Select Operator [SEL_463] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_598] (rows=50 width=12) + Filter Operator [FIL_462] (rows=50 width=12) predicate:((d_moy = 1) and (d_year = 1999) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 42 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_684] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_535] PartitionCols:_col0 - Select Operator [SEL_683] (rows=143930993 width=127) + Select Operator [SEL_534] (rows=143930993 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_682] (rows=143930993 width=127) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_235_date_dim_d_date_sk_min) AND DynamicValue(RS_235_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_235_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_122] (rows=144002668 width=127) + Filter Operator [FIL_533] (rows=143930993 width=127) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_165_date_dim_d_date_sk_min) AND DynamicValue(RS_165_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_165_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_88] (rows=144002668 width=127) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_681] - Group By Operator [GBY_680] (rows=1 width=12) + BROADCAST [RS_532] + Group By Operator [GBY_531] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_607] - Group By Operator [GBY_605] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_471] + Group By Operator [GBY_469] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_603] (rows=50 width=4) + Select Operator [SEL_467] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_599] - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_241] + Please refer to the previous Select Operator [SEL_463] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_173] PartitionCols:_col0 - Select Operator [SEL_233] (rows=471875 width=4) - Output:["_col0"] - Filter Operator [FIL_232] (rows=471875 width=228) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_586] (rows=1415625 width=228) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_229] - Merge Join Operator [MERGEJOIN_585] (rows=1 width=112) - Conds:(Inner),Output:["_col1"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_690] - Select Operator [SEL_689] (rows=1 width=8) - Filter Operator [FIL_688] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_687] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_686] (rows=1 width=8) - Group By Operator [GBY_685] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_661] - Group By Operator [GBY_657] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_653] (rows=11859 width=116) - Output:["_col0"] - Group By Operator [GBY_650] (rows=11859 width=116) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Group By Operator [GBY_48] (rows=106731 width=116) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_568] (rows=18762463 width=116) - Conds:RS_44._col1=RS_648._col0(Inner),Output:["_col2","_col4"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_648] - PartitionCols:_col0 - Select Operator [SEL_646] (rows=80000000 width=4) - Output:["_col0"] - Filter Operator [FIL_645] (rows=80000000 width=4) - predicate:c_customer_sk is not null - TableScan [TS_93] (rows=80000000 width=4) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_567] (rows=18762463 width=112) - Conds:RS_644._col0=RS_636._col0(Inner),Output:["_col1","_col2"] - <-Map 34 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_636] - PartitionCols:_col0 - Select Operator [SEL_635] (rows=2609 width=4) - Output:["_col0"] - Filter Operator [FIL_634] (rows=2609 width=8) - predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_35] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_644] - PartitionCols:_col0 - Select Operator [SEL_643] (rows=525327388 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_642] (rows=525327388 width=118) - predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_42_date_dim_d_date_sk_min) AND DynamicValue(RS_42_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_42_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_32] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_641] - Group By Operator [GBY_640] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_639] - Group By Operator [GBY_638] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_637] (rows=2609 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_635] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_692] - Group By Operator [GBY_691] (rows=1 width=112) + Group By Operator [GBY_171] (rows=235937 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_163] (rows=471875 width=4) + Output:["_col0"] + Filter Operator [FIL_162] (rows=471875 width=228) + predicate:(_col1 > _col2) + Merge Join Operator [MERGEJOIN_450] (rows=1415625 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_528] + Select Operator [SEL_526] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_525] (rows=1 width=112) Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_662] - Group By Operator [GBY_658] (rows=1 width=112) + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_524] + Group By Operator [GBY_523] (rows=1 width=112) Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_654] (rows=11859 width=116) + Select Operator [SEL_522] (rows=11859 width=116) Output:["_col1"] - Please refer to the previous Group By Operator [GBY_650] - <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_699] - Group By Operator [GBY_698] (rows=1415625 width=116) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 40 [SIMPLE_EDGE] - SHUFFLE [RS_223] - PartitionCols:_col0 - Group By Operator [GBY_222] (rows=80000000 width=116) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col2 - Merge Join Operator [MERGEJOIN_579] (rows=550080312 width=116) - Conds:RS_697._col0=RS_649._col0(Inner),Output:["_col1","_col2"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_649] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_646] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_697] - PartitionCols:_col0 - Select Operator [SEL_696] (rows=550080312 width=115) - Output:["_col0","_col1"] - Filter Operator [FIL_695] (rows=550080312 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_240_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_240_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_240_web_sales_ws_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) - TableScan [TS_212] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_694] - Group By Operator [GBY_693] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_559] - Group By Operator [GBY_558] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_557] (rows=155 width=0) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_581] + Group By Operator [GBY_521] (rows=11859 width=116) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_63] + PartitionCols:_col0 + Group By Operator [GBY_62] (rows=106731 width=116) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 + Merge Join Operator [MERGEJOIN_439] (rows=18762463 width=116) + Conds:RS_58._col1=RS_506._col0(Inner),Output:["_col2","_col4"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_506] + PartitionCols:_col0 + Select Operator [SEL_504] (rows=80000000 width=4) + Output:["_col0"] + Filter Operator [FIL_503] (rows=80000000 width=4) + predicate:c_customer_sk is not null + TableScan [TS_35] (rows=80000000 width=4) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk"] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_438] (rows=18762463 width=112) + Conds:RS_520._col0=RS_512._col0(Inner),Output:["_col1","_col2"] + <-Map 35 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_512] + PartitionCols:_col0 + Select Operator [SEL_511] (rows=2609 width=4) + Output:["_col0"] + Filter Operator [FIL_510] (rows=2609 width=8) + predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) + TableScan [TS_49] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_520] + PartitionCols:_col0 + Select Operator [SEL_519] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_518] (rows=525327388 width=118) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_56_date_dim_d_date_sk_min) AND DynamicValue(RS_56_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_56_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_46] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 36 [BROADCAST_EDGE] vectorized + BROADCAST [RS_517] + Group By Operator [GBY_516] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_515] + Group By Operator [GBY_514] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_513] (rows=2609 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_511] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_542] + Group By Operator [GBY_541] (rows=1415625 width=116) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_131] + PartitionCols:_col0 + Group By Operator [GBY_130] (rows=80000000 width=116) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col2 + Merge Join Operator [MERGEJOIN_443] (rows=550080312 width=116) + Conds:RS_540._col0=RS_507._col0(Inner),Output:["_col1","_col2"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_507] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_504] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_540] + PartitionCols:_col0 + Select Operator [SEL_539] (rows=550080312 width=115) + Output:["_col0","_col1"] + Filter Operator [FIL_538] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_172_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_172_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_172_web_sales_ws_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + TableScan [TS_120] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_537] + Group By Operator [GBY_536] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_409] + Group By Operator [GBY_408] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_407] (rows=155 width=0) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_447] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_592] - Group By Operator [GBY_591] (rows=1 width=112) + Reduce Output Operator [RS_456] + Group By Operator [GBY_455] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_589] (rows=13 width=112) + Select Operator [SEL_453] (rows=7 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_588] (rows=13 width=8) - Conds:RS_118._col1=RS_119._col0(Inner),Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_452] (rows=7 width=16) + Conds:RS_84._col1=RS_85._col0(Left Semi),Output:["_col3","_col4"] <-Reducer 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_118] + PARTITION_ONLY_SHUFFLE [RS_84] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_580] (rows=304 width=0) - Conds:RS_115._col2=RS_632._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_446] (rows=304 width=0) + Conds:RS_79._col2=RS_496._col0(Inner),Output:["_col1","_col3","_col4"] <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_632] + SHUFFLE [RS_496] PartitionCols:_col0 - Please refer to the previous Group By Operator [GBY_631] + Please refer to the previous Group By Operator [GBY_495] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_115] + SHUFFLE [RS_79] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_564] (rows=7751875 width=101) - Conds:RS_612._col0=RS_600._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_434] (rows=7751875 width=101) + Conds:RS_476._col0=RS_464._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_600] + PARTITION_ONLY_SHUFFLE [RS_464] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_599] + Please refer to the previous Select Operator [SEL_463] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_612] + SHUFFLE [RS_476] PartitionCols:_col0 - Select Operator [SEL_611] (rows=285117831 width=127) + Select Operator [SEL_475] (rows=285117831 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_610] (rows=285117831 width=127) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_113_date_dim_d_date_sk_min) AND DynamicValue(RS_113_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_113_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + Filter Operator [FIL_474] (rows=285117831 width=127) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_77_date_dim_d_date_sk_min) AND DynamicValue(RS_77_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_77_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=127) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_609] - Group By Operator [GBY_608] (rows=1 width=12) + BROADCAST [RS_473] + Group By Operator [GBY_472] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_606] - Group By Operator [GBY_604] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_470] + Group By Operator [GBY_468] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_601] (rows=50 width=4) + Select Operator [SEL_465] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_599] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_119] + Please refer to the previous Select Operator [SEL_463] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_85] PartitionCols:_col0 - Select Operator [SEL_111] (rows=471875 width=4) - Output:["_col0"] - Filter Operator [FIL_110] (rows=471875 width=228) - predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_583] (rows=1415625 width=228) - Conds:(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_107] - Merge Join Operator [MERGEJOIN_582] (rows=1 width=112) - Conds:(Inner),Output:["_col1"] - <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_668] - Select Operator [SEL_667] (rows=1 width=8) - Filter Operator [FIL_666] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_665] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_664] (rows=1 width=8) - Group By Operator [GBY_663] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_659] - Group By Operator [GBY_655] (rows=1 width=8) - Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_651] (rows=11859 width=116) - Output:["_col0"] - Please refer to the previous Group By Operator [GBY_650] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_670] - Group By Operator [GBY_669] (rows=1 width=112) - Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_660] - Group By Operator [GBY_656] (rows=1 width=112) - Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_652] (rows=11859 width=116) - Output:["_col1"] - Please refer to the previous Group By Operator [GBY_650] - <-Reducer 38 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_677] - Group By Operator [GBY_676] (rows=1415625 width=116) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 37 [SIMPLE_EDGE] - SHUFFLE [RS_101] - PartitionCols:_col0 - Group By Operator [GBY_100] (rows=80000000 width=116) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col2 - Merge Join Operator [MERGEJOIN_571] (rows=550080312 width=116) - Conds:RS_675._col0=RS_647._col0(Inner),Output:["_col1","_col2"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_647] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_646] - <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_675] - PartitionCols:_col0 - Select Operator [SEL_674] (rows=550080312 width=115) - Output:["_col0","_col1"] - Filter Operator [FIL_673] (rows=550080312 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_118_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_118_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_118_catalog_sales_cs_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) - TableScan [TS_90] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_672] - Group By Operator [GBY_671] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_453] - Group By Operator [GBY_452] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_451] (rows=304 width=0) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_580] + Group By Operator [GBY_83] (rows=235937 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_75] (rows=471875 width=4) + Output:["_col0"] + Filter Operator [FIL_74] (rows=471875 width=228) + predicate:(_col1 > _col2) + Merge Join Operator [MERGEJOIN_448] (rows=1415625 width=228) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_527] + Please refer to the previous Select Operator [SEL_526] + <-Reducer 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_509] + Group By Operator [GBY_508] (rows=1415625 width=116) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Group By Operator [GBY_42] (rows=80000000 width=116) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col2 + Merge Join Operator [MERGEJOIN_437] (rows=550080312 width=116) + Conds:RS_502._col0=RS_505._col0(Inner),Output:["_col1","_col2"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_505] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_504] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_502] + PartitionCols:_col0 + Select Operator [SEL_501] (rows=550080312 width=115) + Output:["_col0","_col1"] + Filter Operator [FIL_500] (rows=550080312 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_84_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_84_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_84_catalog_sales_cs_bill_customer_sk_bloom_filter))) and ss_customer_sk is not null) + TableScan [TS_32] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_499] + Group By Operator [GBY_498] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_323] + Group By Operator [GBY_322] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_321] (rows=304 width=0) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_446] diff --git a/ql/src/test/results/clientpositive/perf/tez/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/query44.q.out index 0ec015a3eb..0da0321ca4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query44.q.out @@ -131,7 +131,7 @@ Stage-0 SHUFFLE [RS_22] PartitionCols:0 Filter Operator [FIL_21] (rows=20854 width=228) - predicate:(_col1 > (0.9 * _col2)) + predicate:(_col1 > _col2) Merge Join Operator [MERGEJOIN_103] (rows=62562 width=228) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index 3d957668c6..7611bb49ed 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -263,13 +263,13 @@ STAGE PLANS: alias: tempty_n0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: c (type: char(2)) + expressions: UDFToDouble(c) (type: double) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: char(2)) + value expressions: _col0 (type: double) Execution mode: vectorized Map 5 Map Operator Tree: @@ -277,13 +277,13 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) Execution mode: vectorized Reducer 2 Execution mode: vectorized @@ -311,10 +311,10 @@ STAGE PLANS: 0 1 2 - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 26 Data size: 3381 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (UDFToDouble(_col7) > UDFToDouble(_col1)) (type: boolean) + predicate: (_col11 > _col1) (type: boolean) Statistics: Num rows: 8 Data size: 1040 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) @@ -459,8 +459,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### -Warning: Shuffle Join JOIN[25][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product -Warning: Shuffle Join JOIN[28][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where (select i from tnull_n0 limit 1) is null PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -479,29 +478,25 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 8 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: tnull_n0 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 6 + Map 3 Map Operator Tree: TableScan alias: tnull_n0 @@ -519,53 +514,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -573,41 +522,23 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 26 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 26 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col9 is null (type: boolean) - Statistics: Num rows: 13 Data size: 1768 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 1768 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 1768 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 3303 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col9 is null (type: boolean) + Statistics: Num rows: 13 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -628,8 +559,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[25][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product -Warning: Shuffle Join JOIN[28][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from part where (select i from tnull_n0 limit 1) is null PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -965,7 +895,7 @@ POSTHOOK: Input: default@part 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl 49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join JOIN[32][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -986,26 +916,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 8 <- Reducer 7 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 9 Map Operator Tree: TableScan alias: part @@ -1019,103 +935,42 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) Execution mode: vectorized - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS LAST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: first_value_window_0 - arguments: _col5 - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT + Map 3 + Map Operator Tree: + TableScan + alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: first_value_window_0 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 5 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 26 Data size: 6527 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 6319 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col4 > _col1) (type: boolean) - Statistics: Num rows: 8 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + predicate: (_col2 > _col3) (type: boolean) + Statistics: Num rows: 8 Data size: 1944 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col4 (type: int) + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1944 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1944 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1151,7 +1006,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 8 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -1172,7 +1027,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[32][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -2805,7 +2660,7 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### 2346.3 3461.37 -Warning: Shuffle Join JOIN[72][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[34][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2822,17 +2677,29 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 1 (GROUP, 1) - Reducer 11 <- Reducer 10 (GROUP, 2) - Reducer 12 <- Reducer 11 (GROUP, 2), Reducer 16 (GROUP, 2) - Reducer 13 <- Reducer 12 (GROUP, 1) - Reducer 15 <- Map 14 (GROUP, 1) - Reducer 16 <- Reducer 15 (GROUP, 2) - Reducer 4 <- Reducer 11 (GROUP, 2), Reducer 16 (GROUP, 2) - Reducer 5 <- Map 17 (PARTITION-LEVEL SORT, 1), Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 6 <- Reducer 5 (GROUP, 2), Reducer 9 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 1) + Reducer 9 <- Reducer 8 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToLong(p_size) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: part @@ -2852,7 +2719,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 14 + Map 7 Map Operator Tree: TableScan alias: part @@ -2872,21 +2739,31 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 17 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 26 Data size: 3381 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col9 > _col10) (type: boolean) + Statistics: Num rows: 8 Data size: 1040 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized - Reducer 10 + Statistics: Num rows: 8 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 1040 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -2907,7 +2784,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 11 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -2929,7 +2806,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 12 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -2938,42 +2815,18 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 = 2L) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 13 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 = 2L) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reducer 15 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -2994,7 +2847,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 16 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -3016,52 +2869,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 = 2L) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 - 1 - 2 - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 26 Data size: 3797 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToLong(_col7) > _col0) (type: boolean) - Statistics: Num rows: 8 Data size: 1168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 1168 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 1168 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -3069,7 +2876,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[72][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[34][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_size > (select count(p_name) from part INTERSECT select count(p_brand) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out b/ql/src/test/results/clientpositive/spark/subquery_select.q.out index 23f04add9e..e3ae2ecdaf 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -2930,8 +2930,7 @@ POSTHOOK: Input: default@part 6 false 6 false 7 false -Warning: Shuffle Join JOIN[23][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product -Warning: Shuffle Join JOIN[26][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2948,12 +2947,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 8 <- Map 1 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2962,64 +2958,30 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_size (type: int) + expressions: p_type (type: string) outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + + sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) Execution mode: vectorized - Map 9 + Map 4 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_type (type: string) + expressions: p_size (type: int) outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: int) + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -3027,46 +2989,28 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 130 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 6319 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 130 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 26 Data size: 6319 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2 - Statistics: Num rows: 26 Data size: 6553 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 6553 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 26 Data size: 6553 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Reducer 6 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 6553 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6319 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 6553 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6319 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -3091,8 +3035,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[23][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product -Warning: Shuffle Join JOIN[26][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select p_type, (select p_size from part order by p_size limit 1) = 1 from part order by p_type PREHOOK: type: QUERY PREHOOK: Input: default@part